sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 TYPE_TOKENS = { 113 TokenType.BIT, 114 TokenType.BOOLEAN, 115 TokenType.TINYINT, 116 TokenType.UTINYINT, 117 TokenType.SMALLINT, 118 TokenType.USMALLINT, 119 TokenType.INT, 120 TokenType.UINT, 121 TokenType.BIGINT, 122 TokenType.UBIGINT, 123 TokenType.INT128, 124 TokenType.UINT128, 125 TokenType.INT256, 126 TokenType.UINT256, 127 TokenType.FLOAT, 128 TokenType.DOUBLE, 129 TokenType.CHAR, 130 TokenType.NCHAR, 131 TokenType.VARCHAR, 132 TokenType.NVARCHAR, 133 TokenType.TEXT, 134 TokenType.MEDIUMTEXT, 135 TokenType.LONGTEXT, 136 TokenType.MEDIUMBLOB, 137 TokenType.LONGBLOB, 138 TokenType.BINARY, 139 TokenType.VARBINARY, 140 TokenType.JSON, 141 TokenType.JSONB, 142 TokenType.INTERVAL, 143 TokenType.TIME, 144 TokenType.TIMESTAMP, 145 TokenType.TIMESTAMPTZ, 146 TokenType.TIMESTAMPLTZ, 147 TokenType.DATETIME, 148 TokenType.DATETIME64, 149 TokenType.DATE, 150 TokenType.INT4RANGE, 151 TokenType.INT4MULTIRANGE, 152 TokenType.INT8RANGE, 153 TokenType.INT8MULTIRANGE, 154 TokenType.NUMRANGE, 155 TokenType.NUMMULTIRANGE, 156 TokenType.TSRANGE, 157 TokenType.TSMULTIRANGE, 158 TokenType.TSTZRANGE, 159 TokenType.TSTZMULTIRANGE, 160 TokenType.DATERANGE, 161 TokenType.DATEMULTIRANGE, 162 TokenType.DECIMAL, 163 TokenType.BIGDECIMAL, 164 TokenType.UUID, 165 TokenType.GEOGRAPHY, 166 TokenType.GEOMETRY, 167 TokenType.HLLSKETCH, 168 TokenType.HSTORE, 169 TokenType.PSEUDO_TYPE, 170 TokenType.SUPER, 171 TokenType.SERIAL, 172 TokenType.SMALLSERIAL, 173 TokenType.BIGSERIAL, 174 TokenType.XML, 175 TokenType.UNIQUEIDENTIFIER, 176 TokenType.MONEY, 177 TokenType.SMALLMONEY, 178 TokenType.ROWVERSION, 179 TokenType.IMAGE, 180 TokenType.VARIANT, 181 TokenType.OBJECT, 182 TokenType.INET, 183 *NESTED_TYPE_TOKENS, 184 } 185 186 SUBQUERY_PREDICATES = { 187 TokenType.ANY: exp.Any, 188 TokenType.ALL: exp.All, 189 TokenType.EXISTS: exp.Exists, 190 TokenType.SOME: exp.Any, 191 } 192 193 RESERVED_KEYWORDS = { 194 *Tokenizer.SINGLE_TOKENS.values(), 195 TokenType.SELECT, 196 } 197 198 DB_CREATABLES = { 199 TokenType.DATABASE, 200 TokenType.SCHEMA, 201 TokenType.TABLE, 202 TokenType.VIEW, 203 TokenType.DICTIONARY, 204 } 205 206 CREATABLES = { 207 TokenType.COLUMN, 208 TokenType.FUNCTION, 209 TokenType.INDEX, 210 TokenType.PROCEDURE, 211 *DB_CREATABLES, 212 } 213 214 # Tokens that can represent identifiers 215 ID_VAR_TOKENS = { 216 TokenType.VAR, 217 TokenType.ANTI, 218 TokenType.APPLY, 219 TokenType.ASC, 220 TokenType.AUTO_INCREMENT, 221 TokenType.BEGIN, 222 TokenType.CACHE, 223 TokenType.CASE, 224 TokenType.COLLATE, 225 TokenType.COMMAND, 226 TokenType.COMMENT, 227 TokenType.COMMIT, 228 TokenType.CONSTRAINT, 229 TokenType.DEFAULT, 230 TokenType.DELETE, 231 TokenType.DESC, 232 TokenType.DESCRIBE, 233 TokenType.DICTIONARY, 234 TokenType.DIV, 235 TokenType.END, 236 TokenType.EXECUTE, 237 TokenType.ESCAPE, 238 TokenType.FALSE, 239 TokenType.FIRST, 240 TokenType.FILTER, 241 TokenType.FORMAT, 242 TokenType.FULL, 243 TokenType.IF, 244 TokenType.IS, 245 TokenType.ISNULL, 246 TokenType.INTERVAL, 247 TokenType.KEEP, 248 TokenType.LEFT, 249 TokenType.LOAD, 250 TokenType.MERGE, 251 TokenType.NATURAL, 252 TokenType.NEXT, 253 TokenType.OFFSET, 254 TokenType.ORDINALITY, 255 TokenType.OVERWRITE, 256 TokenType.PARTITION, 257 TokenType.PERCENT, 258 TokenType.PIVOT, 259 TokenType.PRAGMA, 260 TokenType.RANGE, 261 TokenType.REFERENCES, 262 TokenType.RIGHT, 263 TokenType.ROW, 264 TokenType.ROWS, 265 TokenType.SEMI, 266 TokenType.SET, 267 TokenType.SETTINGS, 268 TokenType.SHOW, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRUE, 272 TokenType.UNIQUE, 273 TokenType.UNPIVOT, 274 TokenType.UPDATE, 275 TokenType.VOLATILE, 276 TokenType.WINDOW, 277 *CREATABLES, 278 *SUBQUERY_PREDICATES, 279 *TYPE_TOKENS, 280 *NO_PAREN_FUNCTIONS, 281 } 282 283 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 284 285 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 286 TokenType.APPLY, 287 TokenType.ASOF, 288 TokenType.FULL, 289 TokenType.LEFT, 290 TokenType.LOCK, 291 TokenType.NATURAL, 292 TokenType.OFFSET, 293 TokenType.RIGHT, 294 TokenType.WINDOW, 295 } 296 297 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 298 299 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 300 301 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 302 303 FUNC_TOKENS = { 304 TokenType.COMMAND, 305 TokenType.CURRENT_DATE, 306 TokenType.CURRENT_DATETIME, 307 TokenType.CURRENT_TIMESTAMP, 308 TokenType.CURRENT_TIME, 309 TokenType.CURRENT_USER, 310 TokenType.FILTER, 311 TokenType.FIRST, 312 TokenType.FORMAT, 313 TokenType.GLOB, 314 TokenType.IDENTIFIER, 315 TokenType.INDEX, 316 TokenType.ISNULL, 317 TokenType.ILIKE, 318 TokenType.LIKE, 319 TokenType.MERGE, 320 TokenType.OFFSET, 321 TokenType.PRIMARY_KEY, 322 TokenType.RANGE, 323 TokenType.REPLACE, 324 TokenType.ROW, 325 TokenType.UNNEST, 326 TokenType.VAR, 327 TokenType.LEFT, 328 TokenType.RIGHT, 329 TokenType.DATE, 330 TokenType.DATETIME, 331 TokenType.TABLE, 332 TokenType.TIMESTAMP, 333 TokenType.TIMESTAMPTZ, 334 TokenType.WINDOW, 335 *TYPE_TOKENS, 336 *SUBQUERY_PREDICATES, 337 } 338 339 CONJUNCTION = { 340 TokenType.AND: exp.And, 341 TokenType.OR: exp.Or, 342 } 343 344 EQUALITY = { 345 TokenType.EQ: exp.EQ, 346 TokenType.NEQ: exp.NEQ, 347 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 348 } 349 350 COMPARISON = { 351 TokenType.GT: exp.GT, 352 TokenType.GTE: exp.GTE, 353 TokenType.LT: exp.LT, 354 TokenType.LTE: exp.LTE, 355 } 356 357 BITWISE = { 358 TokenType.AMP: exp.BitwiseAnd, 359 TokenType.CARET: exp.BitwiseXor, 360 TokenType.PIPE: exp.BitwiseOr, 361 TokenType.DPIPE: exp.DPipe, 362 } 363 364 TERM = { 365 TokenType.DASH: exp.Sub, 366 TokenType.PLUS: exp.Add, 367 TokenType.MOD: exp.Mod, 368 TokenType.COLLATE: exp.Collate, 369 } 370 371 FACTOR = { 372 TokenType.DIV: exp.IntDiv, 373 TokenType.LR_ARROW: exp.Distance, 374 TokenType.SLASH: exp.Div, 375 TokenType.STAR: exp.Mul, 376 } 377 378 TIMESTAMPS = { 379 TokenType.TIME, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.TIMESTAMPLTZ, 383 } 384 385 SET_OPERATIONS = { 386 TokenType.UNION, 387 TokenType.INTERSECT, 388 TokenType.EXCEPT, 389 } 390 391 JOIN_METHODS = { 392 TokenType.NATURAL, 393 TokenType.ASOF, 394 } 395 396 JOIN_SIDES = { 397 TokenType.LEFT, 398 TokenType.RIGHT, 399 TokenType.FULL, 400 } 401 402 JOIN_KINDS = { 403 TokenType.INNER, 404 TokenType.OUTER, 405 TokenType.CROSS, 406 TokenType.SEMI, 407 TokenType.ANTI, 408 } 409 410 JOIN_HINTS: t.Set[str] = set() 411 412 LAMBDAS = { 413 TokenType.ARROW: lambda self, expressions: self.expression( 414 exp.Lambda, 415 this=self._replace_lambda( 416 self._parse_conjunction(), 417 {node.name for node in expressions}, 418 ), 419 expressions=expressions, 420 ), 421 TokenType.FARROW: lambda self, expressions: self.expression( 422 exp.Kwarg, 423 this=exp.var(expressions[0].name), 424 expression=self._parse_conjunction(), 425 ), 426 } 427 428 COLUMN_OPERATORS = { 429 TokenType.DOT: None, 430 TokenType.DCOLON: lambda self, this, to: self.expression( 431 exp.Cast if self.STRICT_CAST else exp.TryCast, 432 this=this, 433 to=to, 434 ), 435 TokenType.ARROW: lambda self, this, path: self.expression( 436 exp.JSONExtract, 437 this=this, 438 expression=path, 439 ), 440 TokenType.DARROW: lambda self, this, path: self.expression( 441 exp.JSONExtractScalar, 442 this=this, 443 expression=path, 444 ), 445 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 446 exp.JSONBExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 456 exp.JSONBContains, 457 this=this, 458 expression=key, 459 ), 460 } 461 462 EXPRESSION_PARSERS = { 463 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 464 exp.Column: lambda self: self._parse_column(), 465 exp.Condition: lambda self: self._parse_conjunction(), 466 exp.DataType: lambda self: self._parse_types(), 467 exp.Expression: lambda self: self._parse_statement(), 468 exp.From: lambda self: self._parse_from(), 469 exp.Group: lambda self: self._parse_group(), 470 exp.Having: lambda self: self._parse_having(), 471 exp.Identifier: lambda self: self._parse_id_var(), 472 exp.Join: lambda self: self._parse_join(), 473 exp.Lambda: lambda self: self._parse_lambda(), 474 exp.Lateral: lambda self: self._parse_lateral(), 475 exp.Limit: lambda self: self._parse_limit(), 476 exp.Offset: lambda self: self._parse_offset(), 477 exp.Order: lambda self: self._parse_order(), 478 exp.Ordered: lambda self: self._parse_ordered(), 479 exp.Properties: lambda self: self._parse_properties(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 483 exp.Table: lambda self: self._parse_table_parts(), 484 exp.TableAlias: lambda self: self._parse_table_alias(), 485 exp.Where: lambda self: self._parse_where(), 486 exp.Window: lambda self: self._parse_named_window(), 487 exp.With: lambda self: self._parse_with(), 488 "JOIN_TYPE": lambda self: self._parse_join_parts(), 489 } 490 491 STATEMENT_PARSERS = { 492 TokenType.ALTER: lambda self: self._parse_alter(), 493 TokenType.BEGIN: lambda self: self._parse_transaction(), 494 TokenType.CACHE: lambda self: self._parse_cache(), 495 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 496 TokenType.COMMENT: lambda self: self._parse_comment(), 497 TokenType.CREATE: lambda self: self._parse_create(), 498 TokenType.DELETE: lambda self: self._parse_delete(), 499 TokenType.DESC: lambda self: self._parse_describe(), 500 TokenType.DESCRIBE: lambda self: self._parse_describe(), 501 TokenType.DROP: lambda self: self._parse_drop(), 502 TokenType.END: lambda self: self._parse_commit_or_rollback(), 503 TokenType.FROM: lambda self: exp.select("*").from_( 504 t.cast(exp.From, self._parse_from(skip_from_token=True)) 505 ), 506 TokenType.INSERT: lambda self: self._parse_insert(), 507 TokenType.LOAD: lambda self: self._parse_load(), 508 TokenType.MERGE: lambda self: self._parse_merge(), 509 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 510 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 511 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 512 TokenType.SET: lambda self: self._parse_set(), 513 TokenType.UNCACHE: lambda self: self._parse_uncache(), 514 TokenType.UPDATE: lambda self: self._parse_update(), 515 TokenType.USE: lambda self: self.expression( 516 exp.Use, 517 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 518 and exp.var(self._prev.text), 519 this=self._parse_table(schema=False), 520 ), 521 } 522 523 UNARY_PARSERS = { 524 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 525 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 526 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 527 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 528 } 529 530 PRIMARY_PARSERS = { 531 TokenType.STRING: lambda self, token: self.expression( 532 exp.Literal, this=token.text, is_string=True 533 ), 534 TokenType.NUMBER: lambda self, token: self.expression( 535 exp.Literal, this=token.text, is_string=False 536 ), 537 TokenType.STAR: lambda self, _: self.expression( 538 exp.Star, 539 **{"except": self._parse_except(), "replace": self._parse_replace()}, 540 ), 541 TokenType.NULL: lambda self, _: self.expression(exp.Null), 542 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 543 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 544 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 545 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 546 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 547 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 548 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 549 exp.National, this=token.text 550 ), 551 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 552 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 553 } 554 555 PLACEHOLDER_PARSERS = { 556 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 557 TokenType.PARAMETER: lambda self: self._parse_parameter(), 558 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 559 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 560 else None, 561 } 562 563 RANGE_PARSERS = { 564 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 565 TokenType.GLOB: binary_range_parser(exp.Glob), 566 TokenType.ILIKE: binary_range_parser(exp.ILike), 567 TokenType.IN: lambda self, this: self._parse_in(this), 568 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 569 TokenType.IS: lambda self, this: self._parse_is(this), 570 TokenType.LIKE: binary_range_parser(exp.Like), 571 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 572 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 573 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 574 } 575 576 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 577 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 578 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 579 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 580 "CHARACTER SET": lambda self: self._parse_character_set(), 581 "CHECKSUM": lambda self: self._parse_checksum(), 582 "CLUSTER": lambda self: self._parse_cluster(), 583 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 584 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 585 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 586 "DEFINER": lambda self: self._parse_definer(), 587 "DETERMINISTIC": lambda self: self.expression( 588 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 589 ), 590 "DISTKEY": lambda self: self._parse_distkey(), 591 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 592 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 593 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 594 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 595 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 596 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 597 "FREESPACE": lambda self: self._parse_freespace(), 598 "IMMUTABLE": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 604 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 605 "LIKE": lambda self: self._parse_create_like(), 606 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 607 "LOCK": lambda self: self._parse_locking(), 608 "LOCKING": lambda self: self._parse_locking(), 609 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 610 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 611 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 612 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 613 "NO": lambda self: self._parse_no_property(), 614 "ON": lambda self: self._parse_on_property(), 615 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 616 "PARTITION BY": lambda self: self._parse_partitioned_by(), 617 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 619 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 620 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 621 "RETURNS": lambda self: self._parse_returns(), 622 "ROW": lambda self: self._parse_row(), 623 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 624 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 625 "SETTINGS": lambda self: self.expression( 626 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 627 ), 628 "SORTKEY": lambda self: self._parse_sortkey(), 629 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 630 "STABLE": lambda self: self.expression( 631 exp.StabilityProperty, this=exp.Literal.string("STABLE") 632 ), 633 "STORED": lambda self: self._parse_stored(), 634 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 635 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 636 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 637 "TO": lambda self: self._parse_to_table(), 638 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 639 "TTL": lambda self: self._parse_ttl(), 640 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 641 "VOLATILE": lambda self: self._parse_volatile_property(), 642 "WITH": lambda self: self._parse_with_property(), 643 } 644 645 CONSTRAINT_PARSERS = { 646 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 647 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 648 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 649 "CHARACTER SET": lambda self: self.expression( 650 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 651 ), 652 "CHECK": lambda self: self.expression( 653 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 654 ), 655 "COLLATE": lambda self: self.expression( 656 exp.CollateColumnConstraint, this=self._parse_var() 657 ), 658 "COMMENT": lambda self: self.expression( 659 exp.CommentColumnConstraint, this=self._parse_string() 660 ), 661 "COMPRESS": lambda self: self._parse_compress(), 662 "DEFAULT": lambda self: self.expression( 663 exp.DefaultColumnConstraint, this=self._parse_bitwise() 664 ), 665 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 666 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 667 "FORMAT": lambda self: self.expression( 668 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 669 ), 670 "GENERATED": lambda self: self._parse_generated_as_identity(), 671 "IDENTITY": lambda self: self._parse_auto_increment(), 672 "INLINE": lambda self: self._parse_inline(), 673 "LIKE": lambda self: self._parse_create_like(), 674 "NOT": lambda self: self._parse_not_constraint(), 675 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 676 "ON": lambda self: self._match(TokenType.UPDATE) 677 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 678 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 679 "PRIMARY KEY": lambda self: self._parse_primary_key(), 680 "REFERENCES": lambda self: self._parse_references(match=False), 681 "TITLE": lambda self: self.expression( 682 exp.TitleColumnConstraint, this=self._parse_var_or_string() 683 ), 684 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 685 "UNIQUE": lambda self: self._parse_unique(), 686 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 687 } 688 689 ALTER_PARSERS = { 690 "ADD": lambda self: self._parse_alter_table_add(), 691 "ALTER": lambda self: self._parse_alter_table_alter(), 692 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 693 "DROP": lambda self: self._parse_alter_table_drop(), 694 "RENAME": lambda self: self._parse_alter_table_rename(), 695 } 696 697 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 698 699 NO_PAREN_FUNCTION_PARSERS = { 700 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 701 TokenType.CASE: lambda self: self._parse_case(), 702 TokenType.IF: lambda self: self._parse_if(), 703 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 704 exp.NextValueFor, 705 this=self._parse_column(), 706 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 707 ), 708 } 709 710 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONCAT": lambda self: self._parse_concat(), 715 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 716 "DECODE": lambda self: self._parse_decode(), 717 "EXTRACT": lambda self: self._parse_extract(), 718 "JSON_OBJECT": lambda self: self._parse_json_object(), 719 "LOG": lambda self: self._parse_logarithm(), 720 "MATCH": lambda self: self._parse_match_against(), 721 "OPENJSON": lambda self: self._parse_open_json(), 722 "POSITION": lambda self: self._parse_position(), 723 "SAFE_CAST": lambda self: self._parse_cast(False), 724 "STRING_AGG": lambda self: self._parse_string_agg(), 725 "SUBSTRING": lambda self: self._parse_substring(), 726 "TRIM": lambda self: self._parse_trim(), 727 "TRY_CAST": lambda self: self._parse_cast(False), 728 "TRY_CONVERT": lambda self: self._parse_convert(False), 729 } 730 731 QUERY_MODIFIER_PARSERS = { 732 "joins": lambda self: list(iter(self._parse_join, None)), 733 "laterals": lambda self: list(iter(self._parse_lateral, None)), 734 "match": lambda self: self._parse_match_recognize(), 735 "where": lambda self: self._parse_where(), 736 "group": lambda self: self._parse_group(), 737 "having": lambda self: self._parse_having(), 738 "qualify": lambda self: self._parse_qualify(), 739 "windows": lambda self: self._parse_window_clause(), 740 "order": lambda self: self._parse_order(), 741 "limit": lambda self: self._parse_limit(), 742 "offset": lambda self: self._parse_offset(), 743 "locks": lambda self: self._parse_locks(), 744 "sample": lambda self: self._parse_table_sample(as_modifier=True), 745 } 746 747 SET_PARSERS = { 748 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 749 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 750 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 751 "TRANSACTION": lambda self: self._parse_set_transaction(), 752 } 753 754 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 755 756 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 757 758 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 759 760 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 761 762 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 763 764 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 765 TRANSACTION_CHARACTERISTICS = { 766 "ISOLATION LEVEL REPEATABLE READ", 767 "ISOLATION LEVEL READ COMMITTED", 768 "ISOLATION LEVEL READ UNCOMMITTED", 769 "ISOLATION LEVEL SERIALIZABLE", 770 "READ WRITE", 771 "READ ONLY", 772 } 773 774 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 775 776 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 777 778 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 779 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 780 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 781 782 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 783 784 STRICT_CAST = True 785 786 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 787 788 CONVERT_TYPE_FIRST = False 789 790 PREFIXED_PIVOT_COLUMNS = False 791 IDENTIFY_PIVOT_STRINGS = False 792 793 LOG_BASE_FIRST = True 794 LOG_DEFAULTS_TO_LN = False 795 796 __slots__ = ( 797 "error_level", 798 "error_message_context", 799 "max_errors", 800 "sql", 801 "errors", 802 "_tokens", 803 "_index", 804 "_curr", 805 "_next", 806 "_prev", 807 "_prev_comments", 808 ) 809 810 # Autofilled 811 INDEX_OFFSET: int = 0 812 UNNEST_COLUMN_ONLY: bool = False 813 ALIAS_POST_TABLESAMPLE: bool = False 814 STRICT_STRING_CONCAT = False 815 NULL_ORDERING: str = "nulls_are_small" 816 SHOW_TRIE: t.Dict = {} 817 SET_TRIE: t.Dict = {} 818 FORMAT_MAPPING: t.Dict[str, str] = {} 819 FORMAT_TRIE: t.Dict = {} 820 TIME_MAPPING: t.Dict[str, str] = {} 821 TIME_TRIE: t.Dict = {} 822 823 def __init__( 824 self, 825 error_level: t.Optional[ErrorLevel] = None, 826 error_message_context: int = 100, 827 max_errors: int = 3, 828 ): 829 self.error_level = error_level or ErrorLevel.IMMEDIATE 830 self.error_message_context = error_message_context 831 self.max_errors = max_errors 832 self.reset() 833 834 def reset(self): 835 self.sql = "" 836 self.errors = [] 837 self._tokens = [] 838 self._index = 0 839 self._curr = None 840 self._next = None 841 self._prev = None 842 self._prev_comments = None 843 844 def parse( 845 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 846 ) -> t.List[t.Optional[exp.Expression]]: 847 """ 848 Parses a list of tokens and returns a list of syntax trees, one tree 849 per parsed SQL statement. 850 851 Args: 852 raw_tokens: The list of tokens. 853 sql: The original SQL string, used to produce helpful debug messages. 854 855 Returns: 856 The list of the produced syntax trees. 857 """ 858 return self._parse( 859 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 860 ) 861 862 def parse_into( 863 self, 864 expression_types: exp.IntoType, 865 raw_tokens: t.List[Token], 866 sql: t.Optional[str] = None, 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens into a given Expression type. If a collection of Expression 870 types is given instead, this method will try to parse the token list into each one 871 of them, stopping at the first for which the parsing succeeds. 872 873 Args: 874 expression_types: The expression type(s) to try and parse the token list into. 875 raw_tokens: The list of tokens. 876 sql: The original SQL string, used to produce helpful debug messages. 877 878 Returns: 879 The target Expression. 880 """ 881 errors = [] 882 for expression_type in ensure_list(expression_types): 883 parser = self.EXPRESSION_PARSERS.get(expression_type) 884 if not parser: 885 raise TypeError(f"No parser registered for {expression_type}") 886 887 try: 888 return self._parse(parser, raw_tokens, sql) 889 except ParseError as e: 890 e.errors[0]["into_expression"] = expression_type 891 errors.append(e) 892 893 raise ParseError( 894 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 895 errors=merge_errors(errors), 896 ) from errors[-1] 897 898 def _parse( 899 self, 900 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 901 raw_tokens: t.List[Token], 902 sql: t.Optional[str] = None, 903 ) -> t.List[t.Optional[exp.Expression]]: 904 self.reset() 905 self.sql = sql or "" 906 907 total = len(raw_tokens) 908 chunks: t.List[t.List[Token]] = [[]] 909 910 for i, token in enumerate(raw_tokens): 911 if token.token_type == TokenType.SEMICOLON: 912 if i < total - 1: 913 chunks.append([]) 914 else: 915 chunks[-1].append(token) 916 917 expressions = [] 918 919 for tokens in chunks: 920 self._index = -1 921 self._tokens = tokens 922 self._advance() 923 924 expressions.append(parse_method(self)) 925 926 if self._index < len(self._tokens): 927 self.raise_error("Invalid expression / Unexpected token") 928 929 self.check_errors() 930 931 return expressions 932 933 def check_errors(self) -> None: 934 """Logs or raises any found errors, depending on the chosen error level setting.""" 935 if self.error_level == ErrorLevel.WARN: 936 for error in self.errors: 937 logger.error(str(error)) 938 elif self.error_level == ErrorLevel.RAISE and self.errors: 939 raise ParseError( 940 concat_messages(self.errors, self.max_errors), 941 errors=merge_errors(self.errors), 942 ) 943 944 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 945 """ 946 Appends an error in the list of recorded errors or raises it, depending on the chosen 947 error level setting. 948 """ 949 token = token or self._curr or self._prev or Token.string("") 950 start = token.start 951 end = token.end + 1 952 start_context = self.sql[max(start - self.error_message_context, 0) : start] 953 highlight = self.sql[start:end] 954 end_context = self.sql[end : end + self.error_message_context] 955 956 error = ParseError.new( 957 f"{message}. Line {token.line}, Col: {token.col}.\n" 958 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 959 description=message, 960 line=token.line, 961 col=token.col, 962 start_context=start_context, 963 highlight=highlight, 964 end_context=end_context, 965 ) 966 967 if self.error_level == ErrorLevel.IMMEDIATE: 968 raise error 969 970 self.errors.append(error) 971 972 def expression( 973 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 974 ) -> E: 975 """ 976 Creates a new, validated Expression. 977 978 Args: 979 exp_class: The expression class to instantiate. 980 comments: An optional list of comments to attach to the expression. 981 kwargs: The arguments to set for the expression along with their respective values. 982 983 Returns: 984 The target expression. 985 """ 986 instance = exp_class(**kwargs) 987 instance.add_comments(comments) if comments else self._add_comments(instance) 988 return self.validate_expression(instance) 989 990 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 991 if expression and self._prev_comments: 992 expression.add_comments(self._prev_comments) 993 self._prev_comments = None 994 995 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 996 """ 997 Validates an Expression, making sure that all its mandatory arguments are set. 998 999 Args: 1000 expression: The expression to validate. 1001 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1002 1003 Returns: 1004 The validated expression. 1005 """ 1006 if self.error_level != ErrorLevel.IGNORE: 1007 for error_message in expression.error_messages(args): 1008 self.raise_error(error_message) 1009 1010 return expression 1011 1012 def _find_sql(self, start: Token, end: Token) -> str: 1013 return self.sql[start.start : end.end + 1] 1014 1015 def _advance(self, times: int = 1) -> None: 1016 self._index += times 1017 self._curr = seq_get(self._tokens, self._index) 1018 self._next = seq_get(self._tokens, self._index + 1) 1019 1020 if self._index > 0: 1021 self._prev = self._tokens[self._index - 1] 1022 self._prev_comments = self._prev.comments 1023 else: 1024 self._prev = None 1025 self._prev_comments = None 1026 1027 def _retreat(self, index: int) -> None: 1028 if index != self._index: 1029 self._advance(index - self._index) 1030 1031 def _parse_command(self) -> exp.Command: 1032 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1033 1034 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1035 start = self._prev 1036 exists = self._parse_exists() if allow_exists else None 1037 1038 self._match(TokenType.ON) 1039 1040 kind = self._match_set(self.CREATABLES) and self._prev 1041 if not kind: 1042 return self._parse_as_command(start) 1043 1044 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1045 this = self._parse_user_defined_function(kind=kind.token_type) 1046 elif kind.token_type == TokenType.TABLE: 1047 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1048 elif kind.token_type == TokenType.COLUMN: 1049 this = self._parse_column() 1050 else: 1051 this = self._parse_id_var() 1052 1053 self._match(TokenType.IS) 1054 1055 return self.expression( 1056 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1057 ) 1058 1059 def _parse_to_table( 1060 self, 1061 ) -> exp.ToTableProperty: 1062 table = self._parse_table_parts(schema=True) 1063 return self.expression(exp.ToTableProperty, this=table) 1064 1065 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1066 def _parse_ttl(self) -> exp.Expression: 1067 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1068 this = self._parse_bitwise() 1069 1070 if self._match_text_seq("DELETE"): 1071 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1072 if self._match_text_seq("RECOMPRESS"): 1073 return self.expression( 1074 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1075 ) 1076 if self._match_text_seq("TO", "DISK"): 1077 return self.expression( 1078 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1079 ) 1080 if self._match_text_seq("TO", "VOLUME"): 1081 return self.expression( 1082 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1083 ) 1084 1085 return this 1086 1087 expressions = self._parse_csv(_parse_ttl_action) 1088 where = self._parse_where() 1089 group = self._parse_group() 1090 1091 aggregates = None 1092 if group and self._match(TokenType.SET): 1093 aggregates = self._parse_csv(self._parse_set_item) 1094 1095 return self.expression( 1096 exp.MergeTreeTTL, 1097 expressions=expressions, 1098 where=where, 1099 group=group, 1100 aggregates=aggregates, 1101 ) 1102 1103 def _parse_statement(self) -> t.Optional[exp.Expression]: 1104 if self._curr is None: 1105 return None 1106 1107 if self._match_set(self.STATEMENT_PARSERS): 1108 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1109 1110 if self._match_set(Tokenizer.COMMANDS): 1111 return self._parse_command() 1112 1113 expression = self._parse_expression() 1114 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1115 return self._parse_query_modifiers(expression) 1116 1117 def _parse_drop(self) -> exp.Drop | exp.Command: 1118 start = self._prev 1119 temporary = self._match(TokenType.TEMPORARY) 1120 materialized = self._match_text_seq("MATERIALIZED") 1121 1122 kind = self._match_set(self.CREATABLES) and self._prev.text 1123 if not kind: 1124 return self._parse_as_command(start) 1125 1126 return self.expression( 1127 exp.Drop, 1128 exists=self._parse_exists(), 1129 this=self._parse_table(schema=True), 1130 kind=kind, 1131 temporary=temporary, 1132 materialized=materialized, 1133 cascade=self._match_text_seq("CASCADE"), 1134 constraints=self._match_text_seq("CONSTRAINTS"), 1135 purge=self._match_text_seq("PURGE"), 1136 ) 1137 1138 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1139 return ( 1140 self._match(TokenType.IF) 1141 and (not not_ or self._match(TokenType.NOT)) 1142 and self._match(TokenType.EXISTS) 1143 ) 1144 1145 def _parse_create(self) -> exp.Create | exp.Command: 1146 # Note: this can't be None because we've matched a statement parser 1147 start = self._prev 1148 replace = start.text.upper() == "REPLACE" or self._match_pair( 1149 TokenType.OR, TokenType.REPLACE 1150 ) 1151 unique = self._match(TokenType.UNIQUE) 1152 1153 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1154 self._advance() 1155 1156 properties = None 1157 create_token = self._match_set(self.CREATABLES) and self._prev 1158 1159 if not create_token: 1160 # exp.Properties.Location.POST_CREATE 1161 properties = self._parse_properties() 1162 create_token = self._match_set(self.CREATABLES) and self._prev 1163 1164 if not properties or not create_token: 1165 return self._parse_as_command(start) 1166 1167 exists = self._parse_exists(not_=True) 1168 this = None 1169 expression = None 1170 indexes = None 1171 no_schema_binding = None 1172 begin = None 1173 clone = None 1174 1175 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1176 nonlocal properties 1177 if properties and temp_props: 1178 properties.expressions.extend(temp_props.expressions) 1179 elif temp_props: 1180 properties = temp_props 1181 1182 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1183 this = self._parse_user_defined_function(kind=create_token.token_type) 1184 1185 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1186 extend_props(self._parse_properties()) 1187 1188 self._match(TokenType.ALIAS) 1189 begin = self._match(TokenType.BEGIN) 1190 return_ = self._match_text_seq("RETURN") 1191 expression = self._parse_statement() 1192 1193 if return_: 1194 expression = self.expression(exp.Return, this=expression) 1195 elif create_token.token_type == TokenType.INDEX: 1196 this = self._parse_index(index=self._parse_id_var()) 1197 elif create_token.token_type in self.DB_CREATABLES: 1198 table_parts = self._parse_table_parts(schema=True) 1199 1200 # exp.Properties.Location.POST_NAME 1201 self._match(TokenType.COMMA) 1202 extend_props(self._parse_properties(before=True)) 1203 1204 this = self._parse_schema(this=table_parts) 1205 1206 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1211 # exp.Properties.Location.POST_ALIAS 1212 extend_props(self._parse_properties()) 1213 1214 expression = self._parse_ddl_select() 1215 1216 if create_token.token_type == TokenType.TABLE: 1217 indexes = [] 1218 while True: 1219 index = self._parse_index() 1220 1221 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1222 extend_props(self._parse_properties()) 1223 1224 if not index: 1225 break 1226 else: 1227 self._match(TokenType.COMMA) 1228 indexes.append(index) 1229 elif create_token.token_type == TokenType.VIEW: 1230 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1231 no_schema_binding = True 1232 1233 if self._match_text_seq("CLONE"): 1234 clone = self._parse_table(schema=True) 1235 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1236 clone_kind = ( 1237 self._match(TokenType.L_PAREN) 1238 and self._match_texts(self.CLONE_KINDS) 1239 and self._prev.text.upper() 1240 ) 1241 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1242 self._match(TokenType.R_PAREN) 1243 clone = self.expression( 1244 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1245 ) 1246 1247 return self.expression( 1248 exp.Create, 1249 this=this, 1250 kind=create_token.text, 1251 replace=replace, 1252 unique=unique, 1253 expression=expression, 1254 exists=exists, 1255 properties=properties, 1256 indexes=indexes, 1257 no_schema_binding=no_schema_binding, 1258 begin=begin, 1259 clone=clone, 1260 ) 1261 1262 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1263 # only used for teradata currently 1264 self._match(TokenType.COMMA) 1265 1266 kwargs = { 1267 "no": self._match_text_seq("NO"), 1268 "dual": self._match_text_seq("DUAL"), 1269 "before": self._match_text_seq("BEFORE"), 1270 "default": self._match_text_seq("DEFAULT"), 1271 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1272 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1273 "after": self._match_text_seq("AFTER"), 1274 "minimum": self._match_texts(("MIN", "MINIMUM")), 1275 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1276 } 1277 1278 if self._match_texts(self.PROPERTY_PARSERS): 1279 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1280 try: 1281 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1282 except TypeError: 1283 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1284 1285 return None 1286 1287 def _parse_property(self) -> t.Optional[exp.Expression]: 1288 if self._match_texts(self.PROPERTY_PARSERS): 1289 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1290 1291 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1292 return self._parse_character_set(default=True) 1293 1294 if self._match_text_seq("COMPOUND", "SORTKEY"): 1295 return self._parse_sortkey(compound=True) 1296 1297 if self._match_text_seq("SQL", "SECURITY"): 1298 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1299 1300 assignment = self._match_pair( 1301 TokenType.VAR, TokenType.EQ, advance=False 1302 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1303 1304 if assignment: 1305 key = self._parse_var_or_string() 1306 self._match(TokenType.EQ) 1307 return self.expression(exp.Property, this=key, value=self._parse_column()) 1308 1309 return None 1310 1311 def _parse_stored(self) -> exp.FileFormatProperty: 1312 self._match(TokenType.ALIAS) 1313 1314 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1315 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1316 1317 return self.expression( 1318 exp.FileFormatProperty, 1319 this=self.expression( 1320 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1321 ) 1322 if input_format or output_format 1323 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1324 ) 1325 1326 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1327 self._match(TokenType.EQ) 1328 self._match(TokenType.ALIAS) 1329 return self.expression(exp_class, this=self._parse_field()) 1330 1331 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1332 properties = [] 1333 while True: 1334 if before: 1335 prop = self._parse_property_before() 1336 else: 1337 prop = self._parse_property() 1338 1339 if not prop: 1340 break 1341 for p in ensure_list(prop): 1342 properties.append(p) 1343 1344 if properties: 1345 return self.expression(exp.Properties, expressions=properties) 1346 1347 return None 1348 1349 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1350 return self.expression( 1351 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1352 ) 1353 1354 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1355 if self._index >= 2: 1356 pre_volatile_token = self._tokens[self._index - 2] 1357 else: 1358 pre_volatile_token = None 1359 1360 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1361 return exp.VolatileProperty() 1362 1363 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1364 1365 def _parse_with_property( 1366 self, 1367 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1368 self._match(TokenType.WITH) 1369 if self._match(TokenType.L_PAREN, advance=False): 1370 return self._parse_wrapped_csv(self._parse_property) 1371 1372 if self._match_text_seq("JOURNAL"): 1373 return self._parse_withjournaltable() 1374 1375 if self._match_text_seq("DATA"): 1376 return self._parse_withdata(no=False) 1377 elif self._match_text_seq("NO", "DATA"): 1378 return self._parse_withdata(no=True) 1379 1380 if not self._next: 1381 return None 1382 1383 return self._parse_withisolatedloading() 1384 1385 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1386 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1387 self._match(TokenType.EQ) 1388 1389 user = self._parse_id_var() 1390 self._match(TokenType.PARAMETER) 1391 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1392 1393 if not user or not host: 1394 return None 1395 1396 return exp.DefinerProperty(this=f"{user}@{host}") 1397 1398 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1399 self._match(TokenType.TABLE) 1400 self._match(TokenType.EQ) 1401 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1402 1403 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1404 return self.expression(exp.LogProperty, no=no) 1405 1406 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1407 return self.expression(exp.JournalProperty, **kwargs) 1408 1409 def _parse_checksum(self) -> exp.ChecksumProperty: 1410 self._match(TokenType.EQ) 1411 1412 on = None 1413 if self._match(TokenType.ON): 1414 on = True 1415 elif self._match_text_seq("OFF"): 1416 on = False 1417 1418 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1419 1420 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1421 if not self._match_text_seq("BY"): 1422 self._retreat(self._index - 1) 1423 return None 1424 1425 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1426 1427 def _parse_freespace(self) -> exp.FreespaceProperty: 1428 self._match(TokenType.EQ) 1429 return self.expression( 1430 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1431 ) 1432 1433 def _parse_mergeblockratio( 1434 self, no: bool = False, default: bool = False 1435 ) -> exp.MergeBlockRatioProperty: 1436 if self._match(TokenType.EQ): 1437 return self.expression( 1438 exp.MergeBlockRatioProperty, 1439 this=self._parse_number(), 1440 percent=self._match(TokenType.PERCENT), 1441 ) 1442 1443 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1444 1445 def _parse_datablocksize( 1446 self, 1447 default: t.Optional[bool] = None, 1448 minimum: t.Optional[bool] = None, 1449 maximum: t.Optional[bool] = None, 1450 ) -> exp.DataBlocksizeProperty: 1451 self._match(TokenType.EQ) 1452 size = self._parse_number() 1453 1454 units = None 1455 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1456 units = self._prev.text 1457 1458 return self.expression( 1459 exp.DataBlocksizeProperty, 1460 size=size, 1461 units=units, 1462 default=default, 1463 minimum=minimum, 1464 maximum=maximum, 1465 ) 1466 1467 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1468 self._match(TokenType.EQ) 1469 always = self._match_text_seq("ALWAYS") 1470 manual = self._match_text_seq("MANUAL") 1471 never = self._match_text_seq("NEVER") 1472 default = self._match_text_seq("DEFAULT") 1473 1474 autotemp = None 1475 if self._match_text_seq("AUTOTEMP"): 1476 autotemp = self._parse_schema() 1477 1478 return self.expression( 1479 exp.BlockCompressionProperty, 1480 always=always, 1481 manual=manual, 1482 never=never, 1483 default=default, 1484 autotemp=autotemp, 1485 ) 1486 1487 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1488 no = self._match_text_seq("NO") 1489 concurrent = self._match_text_seq("CONCURRENT") 1490 self._match_text_seq("ISOLATED", "LOADING") 1491 for_all = self._match_text_seq("FOR", "ALL") 1492 for_insert = self._match_text_seq("FOR", "INSERT") 1493 for_none = self._match_text_seq("FOR", "NONE") 1494 return self.expression( 1495 exp.IsolatedLoadingProperty, 1496 no=no, 1497 concurrent=concurrent, 1498 for_all=for_all, 1499 for_insert=for_insert, 1500 for_none=for_none, 1501 ) 1502 1503 def _parse_locking(self) -> exp.LockingProperty: 1504 if self._match(TokenType.TABLE): 1505 kind = "TABLE" 1506 elif self._match(TokenType.VIEW): 1507 kind = "VIEW" 1508 elif self._match(TokenType.ROW): 1509 kind = "ROW" 1510 elif self._match_text_seq("DATABASE"): 1511 kind = "DATABASE" 1512 else: 1513 kind = None 1514 1515 if kind in ("DATABASE", "TABLE", "VIEW"): 1516 this = self._parse_table_parts() 1517 else: 1518 this = None 1519 1520 if self._match(TokenType.FOR): 1521 for_or_in = "FOR" 1522 elif self._match(TokenType.IN): 1523 for_or_in = "IN" 1524 else: 1525 for_or_in = None 1526 1527 if self._match_text_seq("ACCESS"): 1528 lock_type = "ACCESS" 1529 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1530 lock_type = "EXCLUSIVE" 1531 elif self._match_text_seq("SHARE"): 1532 lock_type = "SHARE" 1533 elif self._match_text_seq("READ"): 1534 lock_type = "READ" 1535 elif self._match_text_seq("WRITE"): 1536 lock_type = "WRITE" 1537 elif self._match_text_seq("CHECKSUM"): 1538 lock_type = "CHECKSUM" 1539 else: 1540 lock_type = None 1541 1542 override = self._match_text_seq("OVERRIDE") 1543 1544 return self.expression( 1545 exp.LockingProperty, 1546 this=this, 1547 kind=kind, 1548 for_or_in=for_or_in, 1549 lock_type=lock_type, 1550 override=override, 1551 ) 1552 1553 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1554 if self._match(TokenType.PARTITION_BY): 1555 return self._parse_csv(self._parse_conjunction) 1556 return [] 1557 1558 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1559 self._match(TokenType.EQ) 1560 return self.expression( 1561 exp.PartitionedByProperty, 1562 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1563 ) 1564 1565 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1566 if self._match_text_seq("AND", "STATISTICS"): 1567 statistics = True 1568 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1569 statistics = False 1570 else: 1571 statistics = None 1572 1573 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1574 1575 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1576 if self._match_text_seq("PRIMARY", "INDEX"): 1577 return exp.NoPrimaryIndexProperty() 1578 return None 1579 1580 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1581 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1582 return exp.OnCommitProperty() 1583 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1584 return exp.OnCommitProperty(delete=True) 1585 return None 1586 1587 def _parse_distkey(self) -> exp.DistKeyProperty: 1588 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1589 1590 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1591 table = self._parse_table(schema=True) 1592 1593 options = [] 1594 while self._match_texts(("INCLUDING", "EXCLUDING")): 1595 this = self._prev.text.upper() 1596 1597 id_var = self._parse_id_var() 1598 if not id_var: 1599 return None 1600 1601 options.append( 1602 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1603 ) 1604 1605 return self.expression(exp.LikeProperty, this=table, expressions=options) 1606 1607 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1608 return self.expression( 1609 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1610 ) 1611 1612 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1616 ) 1617 1618 def _parse_returns(self) -> exp.ReturnsProperty: 1619 value: t.Optional[exp.Expression] 1620 is_table = self._match(TokenType.TABLE) 1621 1622 if is_table: 1623 if self._match(TokenType.LT): 1624 value = self.expression( 1625 exp.Schema, 1626 this="TABLE", 1627 expressions=self._parse_csv(self._parse_struct_types), 1628 ) 1629 if not self._match(TokenType.GT): 1630 self.raise_error("Expecting >") 1631 else: 1632 value = self._parse_schema(exp.var("TABLE")) 1633 else: 1634 value = self._parse_types() 1635 1636 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1637 1638 def _parse_describe(self) -> exp.Describe: 1639 kind = self._match_set(self.CREATABLES) and self._prev.text 1640 this = self._parse_table() 1641 return self.expression(exp.Describe, this=this, kind=kind) 1642 1643 def _parse_insert(self) -> exp.Insert: 1644 overwrite = self._match(TokenType.OVERWRITE) 1645 local = self._match_text_seq("LOCAL") 1646 alternative = None 1647 1648 if self._match_text_seq("DIRECTORY"): 1649 this: t.Optional[exp.Expression] = self.expression( 1650 exp.Directory, 1651 this=self._parse_var_or_string(), 1652 local=local, 1653 row_format=self._parse_row_format(match_row=True), 1654 ) 1655 else: 1656 if self._match(TokenType.OR): 1657 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1658 1659 self._match(TokenType.INTO) 1660 self._match(TokenType.TABLE) 1661 this = self._parse_table(schema=True) 1662 1663 return self.expression( 1664 exp.Insert, 1665 this=this, 1666 exists=self._parse_exists(), 1667 partition=self._parse_partition(), 1668 expression=self._parse_ddl_select(), 1669 conflict=self._parse_on_conflict(), 1670 returning=self._parse_returning(), 1671 overwrite=overwrite, 1672 alternative=alternative, 1673 ) 1674 1675 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1676 conflict = self._match_text_seq("ON", "CONFLICT") 1677 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1678 1679 if not conflict and not duplicate: 1680 return None 1681 1682 nothing = None 1683 expressions = None 1684 key = None 1685 constraint = None 1686 1687 if conflict: 1688 if self._match_text_seq("ON", "CONSTRAINT"): 1689 constraint = self._parse_id_var() 1690 else: 1691 key = self._parse_csv(self._parse_value) 1692 1693 self._match_text_seq("DO") 1694 if self._match_text_seq("NOTHING"): 1695 nothing = True 1696 else: 1697 self._match(TokenType.UPDATE) 1698 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1699 1700 return self.expression( 1701 exp.OnConflict, 1702 duplicate=duplicate, 1703 expressions=expressions, 1704 nothing=nothing, 1705 key=key, 1706 constraint=constraint, 1707 ) 1708 1709 def _parse_returning(self) -> t.Optional[exp.Returning]: 1710 if not self._match(TokenType.RETURNING): 1711 return None 1712 1713 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1714 1715 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1716 if not self._match(TokenType.FORMAT): 1717 return None 1718 return self._parse_row_format() 1719 1720 def _parse_row_format( 1721 self, match_row: bool = False 1722 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1723 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1724 return None 1725 1726 if self._match_text_seq("SERDE"): 1727 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1728 1729 self._match_text_seq("DELIMITED") 1730 1731 kwargs = {} 1732 1733 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1734 kwargs["fields"] = self._parse_string() 1735 if self._match_text_seq("ESCAPED", "BY"): 1736 kwargs["escaped"] = self._parse_string() 1737 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1738 kwargs["collection_items"] = self._parse_string() 1739 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1740 kwargs["map_keys"] = self._parse_string() 1741 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1742 kwargs["lines"] = self._parse_string() 1743 if self._match_text_seq("NULL", "DEFINED", "AS"): 1744 kwargs["null"] = self._parse_string() 1745 1746 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1747 1748 def _parse_load(self) -> exp.LoadData | exp.Command: 1749 if self._match_text_seq("DATA"): 1750 local = self._match_text_seq("LOCAL") 1751 self._match_text_seq("INPATH") 1752 inpath = self._parse_string() 1753 overwrite = self._match(TokenType.OVERWRITE) 1754 self._match_pair(TokenType.INTO, TokenType.TABLE) 1755 1756 return self.expression( 1757 exp.LoadData, 1758 this=self._parse_table(schema=True), 1759 local=local, 1760 overwrite=overwrite, 1761 inpath=inpath, 1762 partition=self._parse_partition(), 1763 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1764 serde=self._match_text_seq("SERDE") and self._parse_string(), 1765 ) 1766 return self._parse_as_command(self._prev) 1767 1768 def _parse_delete(self) -> exp.Delete: 1769 self._match(TokenType.FROM) 1770 1771 return self.expression( 1772 exp.Delete, 1773 this=self._parse_table(), 1774 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1775 where=self._parse_where(), 1776 returning=self._parse_returning(), 1777 ) 1778 1779 def _parse_update(self) -> exp.Update: 1780 return self.expression( 1781 exp.Update, 1782 **{ # type: ignore 1783 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1784 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1785 "from": self._parse_from(modifiers=True), 1786 "where": self._parse_where(), 1787 "returning": self._parse_returning(), 1788 }, 1789 ) 1790 1791 def _parse_uncache(self) -> exp.Uncache: 1792 if not self._match(TokenType.TABLE): 1793 self.raise_error("Expecting TABLE after UNCACHE") 1794 1795 return self.expression( 1796 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1797 ) 1798 1799 def _parse_cache(self) -> exp.Cache: 1800 lazy = self._match_text_seq("LAZY") 1801 self._match(TokenType.TABLE) 1802 table = self._parse_table(schema=True) 1803 1804 options = [] 1805 if self._match_text_seq("OPTIONS"): 1806 self._match_l_paren() 1807 k = self._parse_string() 1808 self._match(TokenType.EQ) 1809 v = self._parse_string() 1810 options = [k, v] 1811 self._match_r_paren() 1812 1813 self._match(TokenType.ALIAS) 1814 return self.expression( 1815 exp.Cache, 1816 this=table, 1817 lazy=lazy, 1818 options=options, 1819 expression=self._parse_select(nested=True), 1820 ) 1821 1822 def _parse_partition(self) -> t.Optional[exp.Partition]: 1823 if not self._match(TokenType.PARTITION): 1824 return None 1825 1826 return self.expression( 1827 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1828 ) 1829 1830 def _parse_value(self) -> exp.Tuple: 1831 if self._match(TokenType.L_PAREN): 1832 expressions = self._parse_csv(self._parse_conjunction) 1833 self._match_r_paren() 1834 return self.expression(exp.Tuple, expressions=expressions) 1835 1836 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1837 # Source: https://prestodb.io/docs/current/sql/values.html 1838 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1839 1840 def _parse_select( 1841 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1842 ) -> t.Optional[exp.Expression]: 1843 cte = self._parse_with() 1844 if cte: 1845 this = self._parse_statement() 1846 1847 if not this: 1848 self.raise_error("Failed to parse any statement following CTE") 1849 return cte 1850 1851 if "with" in this.arg_types: 1852 this.set("with", cte) 1853 else: 1854 self.raise_error(f"{this.key} does not support CTE") 1855 this = cte 1856 elif self._match(TokenType.SELECT): 1857 comments = self._prev_comments 1858 1859 hint = self._parse_hint() 1860 all_ = self._match(TokenType.ALL) 1861 distinct = self._match(TokenType.DISTINCT) 1862 1863 kind = ( 1864 self._match(TokenType.ALIAS) 1865 and self._match_texts(("STRUCT", "VALUE")) 1866 and self._prev.text 1867 ) 1868 1869 if distinct: 1870 distinct = self.expression( 1871 exp.Distinct, 1872 on=self._parse_value() if self._match(TokenType.ON) else None, 1873 ) 1874 1875 if all_ and distinct: 1876 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1877 1878 limit = self._parse_limit(top=True) 1879 expressions = self._parse_csv(self._parse_expression) 1880 1881 this = self.expression( 1882 exp.Select, 1883 kind=kind, 1884 hint=hint, 1885 distinct=distinct, 1886 expressions=expressions, 1887 limit=limit, 1888 ) 1889 this.comments = comments 1890 1891 into = self._parse_into() 1892 if into: 1893 this.set("into", into) 1894 1895 from_ = self._parse_from() 1896 if from_: 1897 this.set("from", from_) 1898 1899 this = self._parse_query_modifiers(this) 1900 elif (table or nested) and self._match(TokenType.L_PAREN): 1901 if self._match(TokenType.PIVOT): 1902 this = self._parse_simplified_pivot() 1903 elif self._match(TokenType.FROM): 1904 this = exp.select("*").from_( 1905 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1906 ) 1907 else: 1908 this = self._parse_table() if table else self._parse_select(nested=True) 1909 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1910 1911 self._match_r_paren() 1912 1913 # early return so that subquery unions aren't parsed again 1914 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1915 # Union ALL should be a property of the top select node, not the subquery 1916 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1917 elif self._match(TokenType.VALUES): 1918 this = self.expression( 1919 exp.Values, 1920 expressions=self._parse_csv(self._parse_value), 1921 alias=self._parse_table_alias(), 1922 ) 1923 else: 1924 this = None 1925 1926 return self._parse_set_operations(this) 1927 1928 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1929 if not skip_with_token and not self._match(TokenType.WITH): 1930 return None 1931 1932 comments = self._prev_comments 1933 recursive = self._match(TokenType.RECURSIVE) 1934 1935 expressions = [] 1936 while True: 1937 expressions.append(self._parse_cte()) 1938 1939 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1940 break 1941 else: 1942 self._match(TokenType.WITH) 1943 1944 return self.expression( 1945 exp.With, comments=comments, expressions=expressions, recursive=recursive 1946 ) 1947 1948 def _parse_cte(self) -> exp.CTE: 1949 alias = self._parse_table_alias() 1950 if not alias or not alias.this: 1951 self.raise_error("Expected CTE to have alias") 1952 1953 self._match(TokenType.ALIAS) 1954 return self.expression( 1955 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1956 ) 1957 1958 def _parse_table_alias( 1959 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1960 ) -> t.Optional[exp.TableAlias]: 1961 any_token = self._match(TokenType.ALIAS) 1962 alias = ( 1963 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1964 or self._parse_string_as_identifier() 1965 ) 1966 1967 index = self._index 1968 if self._match(TokenType.L_PAREN): 1969 columns = self._parse_csv(self._parse_function_parameter) 1970 self._match_r_paren() if columns else self._retreat(index) 1971 else: 1972 columns = None 1973 1974 if not alias and not columns: 1975 return None 1976 1977 return self.expression(exp.TableAlias, this=alias, columns=columns) 1978 1979 def _parse_subquery( 1980 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1981 ) -> t.Optional[exp.Subquery]: 1982 if not this: 1983 return None 1984 1985 return self.expression( 1986 exp.Subquery, 1987 this=this, 1988 pivots=self._parse_pivots(), 1989 alias=self._parse_table_alias() if parse_alias else None, 1990 ) 1991 1992 def _parse_query_modifiers( 1993 self, this: t.Optional[exp.Expression] 1994 ) -> t.Optional[exp.Expression]: 1995 if isinstance(this, self.MODIFIABLES): 1996 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1997 expression = parser(self) 1998 1999 if expression: 2000 if key == "limit": 2001 offset = expression.args.pop("offset", None) 2002 if offset: 2003 this.set("offset", exp.Offset(expression=offset)) 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Hint]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 2011 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2012 self.raise_error("Expected */ after HINT") 2013 2014 return self.expression(exp.Hint, expressions=hints) 2015 2016 return None 2017 2018 def _parse_into(self) -> t.Optional[exp.Into]: 2019 if not self._match(TokenType.INTO): 2020 return None 2021 2022 temp = self._match(TokenType.TEMPORARY) 2023 unlogged = self._match_text_seq("UNLOGGED") 2024 self._match(TokenType.TABLE) 2025 2026 return self.expression( 2027 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2028 ) 2029 2030 def _parse_from( 2031 self, modifiers: bool = False, skip_from_token: bool = False 2032 ) -> t.Optional[exp.From]: 2033 if not skip_from_token and not self._match(TokenType.FROM): 2034 return None 2035 2036 comments = self._prev_comments 2037 this = self._parse_table() 2038 2039 return self.expression( 2040 exp.From, 2041 comments=comments, 2042 this=self._parse_query_modifiers(this) if modifiers else this, 2043 ) 2044 2045 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2046 if not self._match(TokenType.MATCH_RECOGNIZE): 2047 return None 2048 2049 self._match_l_paren() 2050 2051 partition = self._parse_partition_by() 2052 order = self._parse_order() 2053 measures = ( 2054 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2055 ) 2056 2057 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2058 rows = exp.var("ONE ROW PER MATCH") 2059 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2060 text = "ALL ROWS PER MATCH" 2061 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2062 text += f" SHOW EMPTY MATCHES" 2063 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2064 text += f" OMIT EMPTY MATCHES" 2065 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2066 text += f" WITH UNMATCHED ROWS" 2067 rows = exp.var(text) 2068 else: 2069 rows = None 2070 2071 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2072 text = "AFTER MATCH SKIP" 2073 if self._match_text_seq("PAST", "LAST", "ROW"): 2074 text += f" PAST LAST ROW" 2075 elif self._match_text_seq("TO", "NEXT", "ROW"): 2076 text += f" TO NEXT ROW" 2077 elif self._match_text_seq("TO", "FIRST"): 2078 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2079 elif self._match_text_seq("TO", "LAST"): 2080 text += f" TO LAST {self._advance_any().text}" # type: ignore 2081 after = exp.var(text) 2082 else: 2083 after = None 2084 2085 if self._match_text_seq("PATTERN"): 2086 self._match_l_paren() 2087 2088 if not self._curr: 2089 self.raise_error("Expecting )", self._curr) 2090 2091 paren = 1 2092 start = self._curr 2093 2094 while self._curr and paren > 0: 2095 if self._curr.token_type == TokenType.L_PAREN: 2096 paren += 1 2097 if self._curr.token_type == TokenType.R_PAREN: 2098 paren -= 1 2099 2100 end = self._prev 2101 self._advance() 2102 2103 if paren > 0: 2104 self.raise_error("Expecting )", self._curr) 2105 2106 pattern = exp.var(self._find_sql(start, end)) 2107 else: 2108 pattern = None 2109 2110 define = ( 2111 self._parse_csv( 2112 lambda: self.expression( 2113 exp.Alias, 2114 alias=self._parse_id_var(any_token=True), 2115 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2116 ) 2117 ) 2118 if self._match_text_seq("DEFINE") 2119 else None 2120 ) 2121 2122 self._match_r_paren() 2123 2124 return self.expression( 2125 exp.MatchRecognize, 2126 partition_by=partition, 2127 order=order, 2128 measures=measures, 2129 rows=rows, 2130 after=after, 2131 pattern=pattern, 2132 define=define, 2133 alias=self._parse_table_alias(), 2134 ) 2135 2136 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2137 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2138 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2139 2140 if outer_apply or cross_apply: 2141 this = self._parse_select(table=True) 2142 view = None 2143 outer = not cross_apply 2144 elif self._match(TokenType.LATERAL): 2145 this = self._parse_select(table=True) 2146 view = self._match(TokenType.VIEW) 2147 outer = self._match(TokenType.OUTER) 2148 else: 2149 return None 2150 2151 if not this: 2152 this = self._parse_function() or self._parse_id_var(any_token=False) 2153 while self._match(TokenType.DOT): 2154 this = exp.Dot( 2155 this=this, 2156 expression=self._parse_function() or self._parse_id_var(any_token=False), 2157 ) 2158 2159 if view: 2160 table = self._parse_id_var(any_token=False) 2161 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2162 table_alias: t.Optional[exp.TableAlias] = self.expression( 2163 exp.TableAlias, this=table, columns=columns 2164 ) 2165 elif isinstance(this, exp.Subquery) and this.alias: 2166 # Ensures parity between the Subquery's and the Lateral's "alias" args 2167 table_alias = this.args["alias"].copy() 2168 else: 2169 table_alias = self._parse_table_alias() 2170 2171 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2172 2173 def _parse_join_parts( 2174 self, 2175 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2176 return ( 2177 self._match_set(self.JOIN_METHODS) and self._prev, 2178 self._match_set(self.JOIN_SIDES) and self._prev, 2179 self._match_set(self.JOIN_KINDS) and self._prev, 2180 ) 2181 2182 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2183 if self._match(TokenType.COMMA): 2184 return self.expression(exp.Join, this=self._parse_table()) 2185 2186 index = self._index 2187 method, side, kind = self._parse_join_parts() 2188 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2189 join = self._match(TokenType.JOIN) 2190 2191 if not skip_join_token and not join: 2192 self._retreat(index) 2193 kind = None 2194 method = None 2195 side = None 2196 2197 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2198 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2199 2200 if not skip_join_token and not join and not outer_apply and not cross_apply: 2201 return None 2202 2203 if outer_apply: 2204 side = Token(TokenType.LEFT, "LEFT") 2205 2206 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2207 2208 if method: 2209 kwargs["method"] = method.text 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) 2223 2224 def _parse_index( 2225 self, 2226 index: t.Optional[exp.Expression] = None, 2227 ) -> t.Optional[exp.Index]: 2228 if index: 2229 unique = None 2230 primary = None 2231 amp = None 2232 2233 self._match(TokenType.ON) 2234 self._match(TokenType.TABLE) # hive 2235 table = self._parse_table_parts(schema=True) 2236 else: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 2241 if not self._match(TokenType.INDEX): 2242 return None 2243 2244 index = self._parse_id_var() 2245 table = None 2246 2247 using = self._parse_field() if self._match(TokenType.USING) else None 2248 2249 if self._match(TokenType.L_PAREN, advance=False): 2250 columns = self._parse_wrapped_csv(self._parse_ordered) 2251 else: 2252 columns = None 2253 2254 return self.expression( 2255 exp.Index, 2256 this=index, 2257 table=table, 2258 using=using, 2259 columns=columns, 2260 unique=unique, 2261 primary=primary, 2262 amp=amp, 2263 partition_by=self._parse_partition_by(), 2264 ) 2265 2266 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2267 return ( 2268 (not schema and self._parse_function(optional_parens=False)) 2269 or self._parse_id_var(any_token=False) 2270 or self._parse_string_as_identifier() 2271 or self._parse_placeholder() 2272 ) 2273 2274 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2275 catalog = None 2276 db = None 2277 table = self._parse_table_part(schema=schema) 2278 2279 while self._match(TokenType.DOT): 2280 if catalog: 2281 # This allows nesting the table in arbitrarily many dot expressions if needed 2282 table = self.expression( 2283 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2284 ) 2285 else: 2286 catalog = db 2287 db = table 2288 table = self._parse_table_part(schema=schema) 2289 2290 if not table: 2291 self.raise_error(f"Expected table name but got {self._curr}") 2292 2293 return self.expression( 2294 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2295 ) 2296 2297 def _parse_table( 2298 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2299 ) -> t.Optional[exp.Expression]: 2300 lateral = self._parse_lateral() 2301 if lateral: 2302 return lateral 2303 2304 unnest = self._parse_unnest() 2305 if unnest: 2306 return unnest 2307 2308 values = self._parse_derived_table_values() 2309 if values: 2310 return values 2311 2312 subquery = self._parse_select(table=True) 2313 if subquery: 2314 if not subquery.args.get("pivots"): 2315 subquery.set("pivots", self._parse_pivots()) 2316 return subquery 2317 2318 this: exp.Expression = self._parse_table_parts(schema=schema) 2319 2320 if schema: 2321 return self._parse_schema(this=this) 2322 2323 if self.ALIAS_POST_TABLESAMPLE: 2324 table_sample = self._parse_table_sample() 2325 2326 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2327 if alias: 2328 this.set("alias", alias) 2329 2330 if not this.args.get("pivots"): 2331 this.set("pivots", self._parse_pivots()) 2332 2333 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2334 this.set( 2335 "hints", 2336 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2337 ) 2338 self._match_r_paren() 2339 2340 if not self.ALIAS_POST_TABLESAMPLE: 2341 table_sample = self._parse_table_sample() 2342 2343 if table_sample: 2344 table_sample.set("this", this) 2345 this = table_sample 2346 2347 return this 2348 2349 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2350 if not self._match(TokenType.UNNEST): 2351 return None 2352 2353 expressions = self._parse_wrapped_csv(self._parse_type) 2354 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2355 2356 alias = self._parse_table_alias() if with_alias else None 2357 2358 if alias and self.UNNEST_COLUMN_ONLY: 2359 if alias.args.get("columns"): 2360 self.raise_error("Unexpected extra column alias in unnest.") 2361 2362 alias.set("columns", [alias.this]) 2363 alias.set("this", None) 2364 2365 offset = None 2366 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2367 self._match(TokenType.ALIAS) 2368 offset = self._parse_id_var() or exp.to_identifier("offset") 2369 2370 return self.expression( 2371 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2372 ) 2373 2374 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2375 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2376 if not is_derived and not self._match(TokenType.VALUES): 2377 return None 2378 2379 expressions = self._parse_csv(self._parse_value) 2380 alias = self._parse_table_alias() 2381 2382 if is_derived: 2383 self._match_r_paren() 2384 2385 return self.expression( 2386 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2387 ) 2388 2389 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2390 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2391 as_modifier and self._match_text_seq("USING", "SAMPLE") 2392 ): 2393 return None 2394 2395 bucket_numerator = None 2396 bucket_denominator = None 2397 bucket_field = None 2398 percent = None 2399 rows = None 2400 size = None 2401 seed = None 2402 2403 kind = ( 2404 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2405 ) 2406 method = self._parse_var(tokens=(TokenType.ROW,)) 2407 2408 self._match(TokenType.L_PAREN) 2409 2410 num = self._parse_number() 2411 2412 if self._match_text_seq("BUCKET"): 2413 bucket_numerator = self._parse_number() 2414 self._match_text_seq("OUT", "OF") 2415 bucket_denominator = bucket_denominator = self._parse_number() 2416 self._match(TokenType.ON) 2417 bucket_field = self._parse_field() 2418 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2419 percent = num 2420 elif self._match(TokenType.ROWS): 2421 rows = num 2422 else: 2423 size = num 2424 2425 self._match(TokenType.R_PAREN) 2426 2427 if self._match(TokenType.L_PAREN): 2428 method = self._parse_var() 2429 seed = self._match(TokenType.COMMA) and self._parse_number() 2430 self._match_r_paren() 2431 elif self._match_texts(("SEED", "REPEATABLE")): 2432 seed = self._parse_wrapped(self._parse_number) 2433 2434 return self.expression( 2435 exp.TableSample, 2436 method=method, 2437 bucket_numerator=bucket_numerator, 2438 bucket_denominator=bucket_denominator, 2439 bucket_field=bucket_field, 2440 percent=percent, 2441 rows=rows, 2442 size=size, 2443 seed=seed, 2444 kind=kind, 2445 ) 2446 2447 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2448 return list(iter(self._parse_pivot, None)) 2449 2450 # https://duckdb.org/docs/sql/statements/pivot 2451 def _parse_simplified_pivot(self) -> exp.Pivot: 2452 def _parse_on() -> t.Optional[exp.Expression]: 2453 this = self._parse_bitwise() 2454 return self._parse_in(this) if self._match(TokenType.IN) else this 2455 2456 this = self._parse_table() 2457 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2458 using = self._match(TokenType.USING) and self._parse_csv( 2459 lambda: self._parse_alias(self._parse_function()) 2460 ) 2461 group = self._parse_group() 2462 return self.expression( 2463 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2464 ) 2465 2466 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2467 index = self._index 2468 2469 if self._match(TokenType.PIVOT): 2470 unpivot = False 2471 elif self._match(TokenType.UNPIVOT): 2472 unpivot = True 2473 else: 2474 return None 2475 2476 expressions = [] 2477 field = None 2478 2479 if not self._match(TokenType.L_PAREN): 2480 self._retreat(index) 2481 return None 2482 2483 if unpivot: 2484 expressions = self._parse_csv(self._parse_column) 2485 else: 2486 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2487 2488 if not expressions: 2489 self.raise_error("Failed to parse PIVOT's aggregation list") 2490 2491 if not self._match(TokenType.FOR): 2492 self.raise_error("Expecting FOR") 2493 2494 value = self._parse_column() 2495 2496 if not self._match(TokenType.IN): 2497 self.raise_error("Expecting IN") 2498 2499 field = self._parse_in(value, alias=True) 2500 2501 self._match_r_paren() 2502 2503 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2504 2505 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2506 pivot.set("alias", self._parse_table_alias()) 2507 2508 if not unpivot: 2509 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2510 2511 columns: t.List[exp.Expression] = [] 2512 for fld in pivot.args["field"].expressions: 2513 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2514 for name in names: 2515 if self.PREFIXED_PIVOT_COLUMNS: 2516 name = f"{name}_{field_name}" if name else field_name 2517 else: 2518 name = f"{field_name}_{name}" if name else field_name 2519 2520 columns.append(exp.to_identifier(name)) 2521 2522 pivot.set("columns", columns) 2523 2524 return pivot 2525 2526 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2527 return [agg.alias for agg in aggregations] 2528 2529 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2530 if not skip_where_token and not self._match(TokenType.WHERE): 2531 return None 2532 2533 return self.expression( 2534 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2535 ) 2536 2537 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2538 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2539 return None 2540 2541 elements = defaultdict(list) 2542 2543 while True: 2544 expressions = self._parse_csv(self._parse_conjunction) 2545 if expressions: 2546 elements["expressions"].extend(expressions) 2547 2548 grouping_sets = self._parse_grouping_sets() 2549 if grouping_sets: 2550 elements["grouping_sets"].extend(grouping_sets) 2551 2552 rollup = None 2553 cube = None 2554 totals = None 2555 2556 with_ = self._match(TokenType.WITH) 2557 if self._match(TokenType.ROLLUP): 2558 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2559 elements["rollup"].extend(ensure_list(rollup)) 2560 2561 if self._match(TokenType.CUBE): 2562 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2563 elements["cube"].extend(ensure_list(cube)) 2564 2565 if self._match_text_seq("TOTALS"): 2566 totals = True 2567 elements["totals"] = True # type: ignore 2568 2569 if not (grouping_sets or rollup or cube or totals): 2570 break 2571 2572 return self.expression(exp.Group, **elements) # type: ignore 2573 2574 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2575 if not self._match(TokenType.GROUPING_SETS): 2576 return None 2577 2578 return self._parse_wrapped_csv(self._parse_grouping_set) 2579 2580 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2581 if self._match(TokenType.L_PAREN): 2582 grouping_set = self._parse_csv(self._parse_column) 2583 self._match_r_paren() 2584 return self.expression(exp.Tuple, expressions=grouping_set) 2585 2586 return self._parse_column() 2587 2588 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2589 if not skip_having_token and not self._match(TokenType.HAVING): 2590 return None 2591 return self.expression(exp.Having, this=self._parse_conjunction()) 2592 2593 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2594 if not self._match(TokenType.QUALIFY): 2595 return None 2596 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2597 2598 def _parse_order( 2599 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2600 ) -> t.Optional[exp.Expression]: 2601 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2602 return this 2603 2604 return self.expression( 2605 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2606 ) 2607 2608 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2609 if not self._match_text_seq(*texts): 2610 return None 2611 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2612 2613 def _parse_ordered(self) -> exp.Ordered: 2614 this = self._parse_conjunction() 2615 self._match(TokenType.ASC) 2616 2617 is_desc = self._match(TokenType.DESC) 2618 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2619 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2620 desc = is_desc or False 2621 asc = not desc 2622 nulls_first = is_nulls_first or False 2623 explicitly_null_ordered = is_nulls_first or is_nulls_last 2624 2625 if ( 2626 not explicitly_null_ordered 2627 and ( 2628 (asc and self.NULL_ORDERING == "nulls_are_small") 2629 or (desc and self.NULL_ORDERING != "nulls_are_small") 2630 ) 2631 and self.NULL_ORDERING != "nulls_are_last" 2632 ): 2633 nulls_first = True 2634 2635 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2636 2637 def _parse_limit( 2638 self, this: t.Optional[exp.Expression] = None, top: bool = False 2639 ) -> t.Optional[exp.Expression]: 2640 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2641 limit_paren = self._match(TokenType.L_PAREN) 2642 expression = self._parse_number() if top else self._parse_term() 2643 2644 if self._match(TokenType.COMMA): 2645 offset = expression 2646 expression = self._parse_term() 2647 else: 2648 offset = None 2649 2650 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2651 2652 if limit_paren: 2653 self._match_r_paren() 2654 2655 return limit_exp 2656 2657 if self._match(TokenType.FETCH): 2658 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2659 direction = self._prev.text if direction else "FIRST" 2660 2661 count = self._parse_number() 2662 percent = self._match(TokenType.PERCENT) 2663 2664 self._match_set((TokenType.ROW, TokenType.ROWS)) 2665 2666 only = self._match_text_seq("ONLY") 2667 with_ties = self._match_text_seq("WITH", "TIES") 2668 2669 if only and with_ties: 2670 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2671 2672 return self.expression( 2673 exp.Fetch, 2674 direction=direction, 2675 count=count, 2676 percent=percent, 2677 with_ties=with_ties, 2678 ) 2679 2680 return this 2681 2682 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2683 if not self._match(TokenType.OFFSET): 2684 return this 2685 2686 count = self._parse_number() 2687 self._match_set((TokenType.ROW, TokenType.ROWS)) 2688 return self.expression(exp.Offset, this=this, expression=count) 2689 2690 def _parse_locks(self) -> t.List[exp.Lock]: 2691 locks = [] 2692 while True: 2693 if self._match_text_seq("FOR", "UPDATE"): 2694 update = True 2695 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2696 "LOCK", "IN", "SHARE", "MODE" 2697 ): 2698 update = False 2699 else: 2700 break 2701 2702 expressions = None 2703 if self._match_text_seq("OF"): 2704 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2705 2706 wait: t.Optional[bool | exp.Expression] = None 2707 if self._match_text_seq("NOWAIT"): 2708 wait = True 2709 elif self._match_text_seq("WAIT"): 2710 wait = self._parse_primary() 2711 elif self._match_text_seq("SKIP", "LOCKED"): 2712 wait = False 2713 2714 locks.append( 2715 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2716 ) 2717 2718 return locks 2719 2720 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2721 if not self._match_set(self.SET_OPERATIONS): 2722 return this 2723 2724 token_type = self._prev.token_type 2725 2726 if token_type == TokenType.UNION: 2727 expression = exp.Union 2728 elif token_type == TokenType.EXCEPT: 2729 expression = exp.Except 2730 else: 2731 expression = exp.Intersect 2732 2733 return self.expression( 2734 expression, 2735 this=this, 2736 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2737 expression=self._parse_set_operations(self._parse_select(nested=True)), 2738 ) 2739 2740 def _parse_expression(self) -> t.Optional[exp.Expression]: 2741 return self._parse_alias(self._parse_conjunction()) 2742 2743 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2744 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2745 2746 def _parse_equality(self) -> t.Optional[exp.Expression]: 2747 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2748 2749 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2750 return self._parse_tokens(self._parse_range, self.COMPARISON) 2751 2752 def _parse_range(self) -> t.Optional[exp.Expression]: 2753 this = self._parse_bitwise() 2754 negate = self._match(TokenType.NOT) 2755 2756 if self._match_set(self.RANGE_PARSERS): 2757 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2758 if not expression: 2759 return this 2760 2761 this = expression 2762 elif self._match(TokenType.ISNULL): 2763 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2764 2765 # Postgres supports ISNULL and NOTNULL for conditions. 2766 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2767 if self._match(TokenType.NOTNULL): 2768 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2769 this = self.expression(exp.Not, this=this) 2770 2771 if negate: 2772 this = self.expression(exp.Not, this=this) 2773 2774 if self._match(TokenType.IS): 2775 this = self._parse_is(this) 2776 2777 return this 2778 2779 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2780 index = self._index - 1 2781 negate = self._match(TokenType.NOT) 2782 2783 if self._match_text_seq("DISTINCT", "FROM"): 2784 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2785 return self.expression(klass, this=this, expression=self._parse_expression()) 2786 2787 expression = self._parse_null() or self._parse_boolean() 2788 if not expression: 2789 self._retreat(index) 2790 return None 2791 2792 this = self.expression(exp.Is, this=this, expression=expression) 2793 return self.expression(exp.Not, this=this) if negate else this 2794 2795 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2796 unnest = self._parse_unnest(with_alias=False) 2797 if unnest: 2798 this = self.expression(exp.In, this=this, unnest=unnest) 2799 elif self._match(TokenType.L_PAREN): 2800 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2801 2802 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2803 this = self.expression(exp.In, this=this, query=expressions[0]) 2804 else: 2805 this = self.expression(exp.In, this=this, expressions=expressions) 2806 2807 self._match_r_paren(this) 2808 else: 2809 this = self.expression(exp.In, this=this, field=self._parse_field()) 2810 2811 return this 2812 2813 def _parse_between(self, this: exp.Expression) -> exp.Between: 2814 low = self._parse_bitwise() 2815 self._match(TokenType.AND) 2816 high = self._parse_bitwise() 2817 return self.expression(exp.Between, this=this, low=low, high=high) 2818 2819 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2820 if not self._match(TokenType.ESCAPE): 2821 return this 2822 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2823 2824 def _parse_interval(self) -> t.Optional[exp.Interval]: 2825 if not self._match(TokenType.INTERVAL): 2826 return None 2827 2828 this = self._parse_primary() or self._parse_term() 2829 unit = self._parse_function() or self._parse_var() 2830 2831 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2832 # each INTERVAL expression into this canonical form so it's easy to transpile 2833 if this and this.is_number: 2834 this = exp.Literal.string(this.name) 2835 elif this and this.is_string: 2836 parts = this.name.split() 2837 2838 if len(parts) == 2: 2839 if unit: 2840 # this is not actually a unit, it's something else 2841 unit = None 2842 self._retreat(self._index - 1) 2843 else: 2844 this = exp.Literal.string(parts[0]) 2845 unit = self.expression(exp.Var, this=parts[1]) 2846 2847 return self.expression(exp.Interval, this=this, unit=unit) 2848 2849 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2850 this = self._parse_term() 2851 2852 while True: 2853 if self._match_set(self.BITWISE): 2854 this = self.expression( 2855 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2856 ) 2857 elif self._match_pair(TokenType.LT, TokenType.LT): 2858 this = self.expression( 2859 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2860 ) 2861 elif self._match_pair(TokenType.GT, TokenType.GT): 2862 this = self.expression( 2863 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2864 ) 2865 else: 2866 break 2867 2868 return this 2869 2870 def _parse_term(self) -> t.Optional[exp.Expression]: 2871 return self._parse_tokens(self._parse_factor, self.TERM) 2872 2873 def _parse_factor(self) -> t.Optional[exp.Expression]: 2874 return self._parse_tokens(self._parse_unary, self.FACTOR) 2875 2876 def _parse_unary(self) -> t.Optional[exp.Expression]: 2877 if self._match_set(self.UNARY_PARSERS): 2878 return self.UNARY_PARSERS[self._prev.token_type](self) 2879 return self._parse_at_time_zone(self._parse_type()) 2880 2881 def _parse_type(self) -> t.Optional[exp.Expression]: 2882 interval = self._parse_interval() 2883 if interval: 2884 return interval 2885 2886 index = self._index 2887 data_type = self._parse_types(check_func=True) 2888 this = self._parse_column() 2889 2890 if data_type: 2891 if isinstance(this, exp.Literal): 2892 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2893 if parser: 2894 return parser(self, this, data_type) 2895 return self.expression(exp.Cast, this=this, to=data_type) 2896 if not data_type.expressions: 2897 self._retreat(index) 2898 return self._parse_column() 2899 return self._parse_column_ops(data_type) 2900 2901 return this 2902 2903 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2904 this = self._parse_type() 2905 if not this: 2906 return None 2907 2908 return self.expression( 2909 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2910 ) 2911 2912 def _parse_types( 2913 self, check_func: bool = False, schema: bool = False 2914 ) -> t.Optional[exp.Expression]: 2915 index = self._index 2916 2917 prefix = self._match_text_seq("SYSUDTLIB", ".") 2918 2919 if not self._match_set(self.TYPE_TOKENS): 2920 return None 2921 2922 type_token = self._prev.token_type 2923 2924 if type_token == TokenType.PSEUDO_TYPE: 2925 return self.expression(exp.PseudoType, this=self._prev.text) 2926 2927 nested = type_token in self.NESTED_TYPE_TOKENS 2928 is_struct = type_token == TokenType.STRUCT 2929 expressions = None 2930 maybe_func = False 2931 2932 if self._match(TokenType.L_PAREN): 2933 if is_struct: 2934 expressions = self._parse_csv(self._parse_struct_types) 2935 elif nested: 2936 expressions = self._parse_csv( 2937 lambda: self._parse_types(check_func=check_func, schema=schema) 2938 ) 2939 else: 2940 expressions = self._parse_csv(self._parse_type_size) 2941 2942 if not expressions or not self._match(TokenType.R_PAREN): 2943 self._retreat(index) 2944 return None 2945 2946 maybe_func = True 2947 2948 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2949 this = exp.DataType( 2950 this=exp.DataType.Type.ARRAY, 2951 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2952 nested=True, 2953 ) 2954 2955 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2956 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2957 2958 return this 2959 2960 if self._match(TokenType.L_BRACKET): 2961 self._retreat(index) 2962 return None 2963 2964 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2965 if nested and self._match(TokenType.LT): 2966 if is_struct: 2967 expressions = self._parse_csv(self._parse_struct_types) 2968 else: 2969 expressions = self._parse_csv( 2970 lambda: self._parse_types(check_func=check_func, schema=schema) 2971 ) 2972 2973 if not self._match(TokenType.GT): 2974 self.raise_error("Expecting >") 2975 2976 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2977 values = self._parse_csv(self._parse_conjunction) 2978 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2979 2980 value: t.Optional[exp.Expression] = None 2981 if type_token in self.TIMESTAMPS: 2982 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2983 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2984 elif ( 2985 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2986 or type_token == TokenType.TIMESTAMPLTZ 2987 ): 2988 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2989 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2990 if type_token == TokenType.TIME: 2991 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2992 else: 2993 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2994 2995 maybe_func = maybe_func and value is None 2996 2997 if value is None: 2998 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2999 elif type_token == TokenType.INTERVAL: 3000 unit = self._parse_var() 3001 3002 if not unit: 3003 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3004 else: 3005 value = self.expression(exp.Interval, unit=unit) 3006 3007 if maybe_func and check_func: 3008 index2 = self._index 3009 peek = self._parse_string() 3010 3011 if not peek: 3012 self._retreat(index) 3013 return None 3014 3015 self._retreat(index2) 3016 3017 if value: 3018 return value 3019 3020 return exp.DataType( 3021 this=exp.DataType.Type[type_token.value.upper()], 3022 expressions=expressions, 3023 nested=nested, 3024 values=values, 3025 prefix=prefix, 3026 ) 3027 3028 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3029 this = self._parse_type() or self._parse_id_var() 3030 self._match(TokenType.COLON) 3031 return self._parse_column_def(this) 3032 3033 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3034 if not self._match_text_seq("AT", "TIME", "ZONE"): 3035 return this 3036 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3037 3038 def _parse_column(self) -> t.Optional[exp.Expression]: 3039 this = self._parse_field() 3040 if isinstance(this, exp.Identifier): 3041 this = self.expression(exp.Column, this=this) 3042 elif not this: 3043 return self._parse_bracket(this) 3044 return self._parse_column_ops(this) 3045 3046 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3047 this = self._parse_bracket(this) 3048 3049 while self._match_set(self.COLUMN_OPERATORS): 3050 op_token = self._prev.token_type 3051 op = self.COLUMN_OPERATORS.get(op_token) 3052 3053 if op_token == TokenType.DCOLON: 3054 field = self._parse_types() 3055 if not field: 3056 self.raise_error("Expected type") 3057 elif op and self._curr: 3058 self._advance() 3059 value = self._prev.text 3060 field = ( 3061 exp.Literal.number(value) 3062 if self._prev.token_type == TokenType.NUMBER 3063 else exp.Literal.string(value) 3064 ) 3065 else: 3066 field = self._parse_field(anonymous_func=True, any_token=True) 3067 3068 if isinstance(field, exp.Func): 3069 # bigquery allows function calls like x.y.count(...) 3070 # SAFE.SUBSTR(...) 3071 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3072 this = self._replace_columns_with_dots(this) 3073 3074 if op: 3075 this = op(self, this, field) 3076 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3077 this = self.expression( 3078 exp.Column, 3079 this=field, 3080 table=this.this, 3081 db=this.args.get("table"), 3082 catalog=this.args.get("db"), 3083 ) 3084 else: 3085 this = self.expression(exp.Dot, this=this, expression=field) 3086 this = self._parse_bracket(this) 3087 return this 3088 3089 def _parse_primary(self) -> t.Optional[exp.Expression]: 3090 if self._match_set(self.PRIMARY_PARSERS): 3091 token_type = self._prev.token_type 3092 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3093 3094 if token_type == TokenType.STRING: 3095 expressions = [primary] 3096 while self._match(TokenType.STRING): 3097 expressions.append(exp.Literal.string(self._prev.text)) 3098 3099 if len(expressions) > 1: 3100 return self.expression(exp.Concat, expressions=expressions) 3101 3102 return primary 3103 3104 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3105 return exp.Literal.number(f"0.{self._prev.text}") 3106 3107 if self._match(TokenType.L_PAREN): 3108 comments = self._prev_comments 3109 query = self._parse_select() 3110 3111 if query: 3112 expressions = [query] 3113 else: 3114 expressions = self._parse_csv(self._parse_expression) 3115 3116 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3117 3118 if isinstance(this, exp.Subqueryable): 3119 this = self._parse_set_operations( 3120 self._parse_subquery(this=this, parse_alias=False) 3121 ) 3122 elif len(expressions) > 1: 3123 this = self.expression(exp.Tuple, expressions=expressions) 3124 else: 3125 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3126 3127 if this: 3128 this.add_comments(comments) 3129 3130 self._match_r_paren(expression=this) 3131 return this 3132 3133 return None 3134 3135 def _parse_field( 3136 self, 3137 any_token: bool = False, 3138 tokens: t.Optional[t.Collection[TokenType]] = None, 3139 anonymous_func: bool = False, 3140 ) -> t.Optional[exp.Expression]: 3141 return ( 3142 self._parse_primary() 3143 or self._parse_function(anonymous=anonymous_func) 3144 or self._parse_id_var(any_token=any_token, tokens=tokens) 3145 ) 3146 3147 def _parse_function( 3148 self, 3149 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3150 anonymous: bool = False, 3151 optional_parens: bool = True, 3152 ) -> t.Optional[exp.Expression]: 3153 if not self._curr: 3154 return None 3155 3156 token_type = self._curr.token_type 3157 3158 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3159 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3160 3161 if not self._next or self._next.token_type != TokenType.L_PAREN: 3162 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3163 self._advance() 3164 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3165 3166 return None 3167 3168 if token_type not in self.FUNC_TOKENS: 3169 return None 3170 3171 this = self._curr.text 3172 upper = this.upper() 3173 self._advance(2) 3174 3175 parser = self.FUNCTION_PARSERS.get(upper) 3176 3177 if parser and not anonymous: 3178 this = parser(self) 3179 else: 3180 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3181 3182 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3183 this = self.expression(subquery_predicate, this=self._parse_select()) 3184 self._match_r_paren() 3185 return this 3186 3187 if functions is None: 3188 functions = self.FUNCTIONS 3189 3190 function = functions.get(upper) 3191 3192 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3193 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3194 3195 if function and not anonymous: 3196 this = self.validate_expression(function(args), args) 3197 else: 3198 this = self.expression(exp.Anonymous, this=this, expressions=args) 3199 3200 self._match_r_paren(this) 3201 return self._parse_window(this) 3202 3203 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3204 return self._parse_column_def(self._parse_id_var()) 3205 3206 def _parse_user_defined_function( 3207 self, kind: t.Optional[TokenType] = None 3208 ) -> t.Optional[exp.Expression]: 3209 this = self._parse_id_var() 3210 3211 while self._match(TokenType.DOT): 3212 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3213 3214 if not self._match(TokenType.L_PAREN): 3215 return this 3216 3217 expressions = self._parse_csv(self._parse_function_parameter) 3218 self._match_r_paren() 3219 return self.expression( 3220 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3221 ) 3222 3223 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3224 literal = self._parse_primary() 3225 if literal: 3226 return self.expression(exp.Introducer, this=token.text, expression=literal) 3227 3228 return self.expression(exp.Identifier, this=token.text) 3229 3230 def _parse_session_parameter(self) -> exp.SessionParameter: 3231 kind = None 3232 this = self._parse_id_var() or self._parse_primary() 3233 3234 if this and self._match(TokenType.DOT): 3235 kind = this.name 3236 this = self._parse_var() or self._parse_primary() 3237 3238 return self.expression(exp.SessionParameter, this=this, kind=kind) 3239 3240 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3241 index = self._index 3242 3243 if self._match(TokenType.L_PAREN): 3244 expressions = self._parse_csv(self._parse_id_var) 3245 3246 if not self._match(TokenType.R_PAREN): 3247 self._retreat(index) 3248 else: 3249 expressions = [self._parse_id_var()] 3250 3251 if self._match_set(self.LAMBDAS): 3252 return self.LAMBDAS[self._prev.token_type](self, expressions) 3253 3254 self._retreat(index) 3255 3256 this: t.Optional[exp.Expression] 3257 3258 if self._match(TokenType.DISTINCT): 3259 this = self.expression( 3260 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3261 ) 3262 else: 3263 this = self._parse_select_or_expression(alias=alias) 3264 3265 if isinstance(this, exp.EQ): 3266 left = this.this 3267 if isinstance(left, exp.Column): 3268 left.replace(exp.var(left.text("this"))) 3269 3270 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3271 3272 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3273 index = self._index 3274 3275 if not self.errors: 3276 try: 3277 if self._parse_select(nested=True): 3278 return this 3279 except ParseError: 3280 pass 3281 finally: 3282 self.errors.clear() 3283 self._retreat(index) 3284 3285 if not self._match(TokenType.L_PAREN): 3286 return this 3287 3288 args = self._parse_csv( 3289 lambda: self._parse_constraint() 3290 or self._parse_column_def(self._parse_field(any_token=True)) 3291 ) 3292 3293 self._match_r_paren() 3294 return self.expression(exp.Schema, this=this, expressions=args) 3295 3296 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3297 # column defs are not really columns, they're identifiers 3298 if isinstance(this, exp.Column): 3299 this = this.this 3300 3301 kind = self._parse_types(schema=True) 3302 3303 if self._match_text_seq("FOR", "ORDINALITY"): 3304 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3305 3306 constraints = [] 3307 while True: 3308 constraint = self._parse_column_constraint() 3309 if not constraint: 3310 break 3311 constraints.append(constraint) 3312 3313 if not kind and not constraints: 3314 return this 3315 3316 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3317 3318 def _parse_auto_increment( 3319 self, 3320 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3321 start = None 3322 increment = None 3323 3324 if self._match(TokenType.L_PAREN, advance=False): 3325 args = self._parse_wrapped_csv(self._parse_bitwise) 3326 start = seq_get(args, 0) 3327 increment = seq_get(args, 1) 3328 elif self._match_text_seq("START"): 3329 start = self._parse_bitwise() 3330 self._match_text_seq("INCREMENT") 3331 increment = self._parse_bitwise() 3332 3333 if start and increment: 3334 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3335 3336 return exp.AutoIncrementColumnConstraint() 3337 3338 def _parse_compress(self) -> exp.CompressColumnConstraint: 3339 if self._match(TokenType.L_PAREN, advance=False): 3340 return self.expression( 3341 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3342 ) 3343 3344 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3345 3346 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3347 if self._match_text_seq("BY", "DEFAULT"): 3348 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3349 this = self.expression( 3350 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3351 ) 3352 else: 3353 self._match_text_seq("ALWAYS") 3354 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3355 3356 self._match(TokenType.ALIAS) 3357 identity = self._match_text_seq("IDENTITY") 3358 3359 if self._match(TokenType.L_PAREN): 3360 if self._match_text_seq("START", "WITH"): 3361 this.set("start", self._parse_bitwise()) 3362 if self._match_text_seq("INCREMENT", "BY"): 3363 this.set("increment", self._parse_bitwise()) 3364 if self._match_text_seq("MINVALUE"): 3365 this.set("minvalue", self._parse_bitwise()) 3366 if self._match_text_seq("MAXVALUE"): 3367 this.set("maxvalue", self._parse_bitwise()) 3368 3369 if self._match_text_seq("CYCLE"): 3370 this.set("cycle", True) 3371 elif self._match_text_seq("NO", "CYCLE"): 3372 this.set("cycle", False) 3373 3374 if not identity: 3375 this.set("expression", self._parse_bitwise()) 3376 3377 self._match_r_paren() 3378 3379 return this 3380 3381 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3382 self._match_text_seq("LENGTH") 3383 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3384 3385 def _parse_not_constraint( 3386 self, 3387 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3388 if self._match_text_seq("NULL"): 3389 return self.expression(exp.NotNullColumnConstraint) 3390 if self._match_text_seq("CASESPECIFIC"): 3391 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3392 return None 3393 3394 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3395 if self._match(TokenType.CONSTRAINT): 3396 this = self._parse_id_var() 3397 else: 3398 this = None 3399 3400 if self._match_texts(self.CONSTRAINT_PARSERS): 3401 return self.expression( 3402 exp.ColumnConstraint, 3403 this=this, 3404 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3405 ) 3406 3407 return this 3408 3409 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3410 if not self._match(TokenType.CONSTRAINT): 3411 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3412 3413 this = self._parse_id_var() 3414 expressions = [] 3415 3416 while True: 3417 constraint = self._parse_unnamed_constraint() or self._parse_function() 3418 if not constraint: 3419 break 3420 expressions.append(constraint) 3421 3422 return self.expression(exp.Constraint, this=this, expressions=expressions) 3423 3424 def _parse_unnamed_constraint( 3425 self, constraints: t.Optional[t.Collection[str]] = None 3426 ) -> t.Optional[exp.Expression]: 3427 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3428 return None 3429 3430 constraint = self._prev.text.upper() 3431 if constraint not in self.CONSTRAINT_PARSERS: 3432 self.raise_error(f"No parser found for schema constraint {constraint}.") 3433 3434 return self.CONSTRAINT_PARSERS[constraint](self) 3435 3436 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3437 self._match_text_seq("KEY") 3438 return self.expression( 3439 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3440 ) 3441 3442 def _parse_key_constraint_options(self) -> t.List[str]: 3443 options = [] 3444 while True: 3445 if not self._curr: 3446 break 3447 3448 if self._match(TokenType.ON): 3449 action = None 3450 on = self._advance_any() and self._prev.text 3451 3452 if self._match_text_seq("NO", "ACTION"): 3453 action = "NO ACTION" 3454 elif self._match_text_seq("CASCADE"): 3455 action = "CASCADE" 3456 elif self._match_pair(TokenType.SET, TokenType.NULL): 3457 action = "SET NULL" 3458 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3459 action = "SET DEFAULT" 3460 else: 3461 self.raise_error("Invalid key constraint") 3462 3463 options.append(f"ON {on} {action}") 3464 elif self._match_text_seq("NOT", "ENFORCED"): 3465 options.append("NOT ENFORCED") 3466 elif self._match_text_seq("DEFERRABLE"): 3467 options.append("DEFERRABLE") 3468 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3469 options.append("INITIALLY DEFERRED") 3470 elif self._match_text_seq("NORELY"): 3471 options.append("NORELY") 3472 elif self._match_text_seq("MATCH", "FULL"): 3473 options.append("MATCH FULL") 3474 else: 3475 break 3476 3477 return options 3478 3479 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3480 if match and not self._match(TokenType.REFERENCES): 3481 return None 3482 3483 expressions = None 3484 this = self._parse_id_var() 3485 3486 if self._match(TokenType.L_PAREN, advance=False): 3487 expressions = self._parse_wrapped_id_vars() 3488 3489 options = self._parse_key_constraint_options() 3490 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3491 3492 def _parse_foreign_key(self) -> exp.ForeignKey: 3493 expressions = self._parse_wrapped_id_vars() 3494 reference = self._parse_references() 3495 options = {} 3496 3497 while self._match(TokenType.ON): 3498 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3499 self.raise_error("Expected DELETE or UPDATE") 3500 3501 kind = self._prev.text.lower() 3502 3503 if self._match_text_seq("NO", "ACTION"): 3504 action = "NO ACTION" 3505 elif self._match(TokenType.SET): 3506 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3507 action = "SET " + self._prev.text.upper() 3508 else: 3509 self._advance() 3510 action = self._prev.text.upper() 3511 3512 options[kind] = action 3513 3514 return self.expression( 3515 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3516 ) 3517 3518 def _parse_primary_key( 3519 self, wrapped_optional: bool = False, in_props: bool = False 3520 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3521 desc = ( 3522 self._match_set((TokenType.ASC, TokenType.DESC)) 3523 and self._prev.token_type == TokenType.DESC 3524 ) 3525 3526 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3527 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3528 3529 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3530 options = self._parse_key_constraint_options() 3531 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3532 3533 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3534 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3535 return this 3536 3537 bracket_kind = self._prev.token_type 3538 3539 if self._match(TokenType.COLON): 3540 expressions: t.List[t.Optional[exp.Expression]] = [ 3541 self.expression(exp.Slice, expression=self._parse_conjunction()) 3542 ] 3543 else: 3544 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3545 3546 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3547 if bracket_kind == TokenType.L_BRACE: 3548 this = self.expression(exp.Struct, expressions=expressions) 3549 elif not this or this.name.upper() == "ARRAY": 3550 this = self.expression(exp.Array, expressions=expressions) 3551 else: 3552 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3553 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3554 3555 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3556 self.raise_error("Expected ]") 3557 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3558 self.raise_error("Expected }") 3559 3560 self._add_comments(this) 3561 return self._parse_bracket(this) 3562 3563 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3564 if self._match(TokenType.COLON): 3565 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3566 return this 3567 3568 def _parse_case(self) -> t.Optional[exp.Expression]: 3569 ifs = [] 3570 default = None 3571 3572 expression = self._parse_conjunction() 3573 3574 while self._match(TokenType.WHEN): 3575 this = self._parse_conjunction() 3576 self._match(TokenType.THEN) 3577 then = self._parse_conjunction() 3578 ifs.append(self.expression(exp.If, this=this, true=then)) 3579 3580 if self._match(TokenType.ELSE): 3581 default = self._parse_conjunction() 3582 3583 if not self._match(TokenType.END): 3584 self.raise_error("Expected END after CASE", self._prev) 3585 3586 return self._parse_window( 3587 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3588 ) 3589 3590 def _parse_if(self) -> t.Optional[exp.Expression]: 3591 if self._match(TokenType.L_PAREN): 3592 args = self._parse_csv(self._parse_conjunction) 3593 this = self.validate_expression(exp.If.from_arg_list(args), args) 3594 self._match_r_paren() 3595 else: 3596 index = self._index - 1 3597 condition = self._parse_conjunction() 3598 3599 if not condition: 3600 self._retreat(index) 3601 return None 3602 3603 self._match(TokenType.THEN) 3604 true = self._parse_conjunction() 3605 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3606 self._match(TokenType.END) 3607 this = self.expression(exp.If, this=condition, true=true, false=false) 3608 3609 return self._parse_window(this) 3610 3611 def _parse_extract(self) -> exp.Extract: 3612 this = self._parse_function() or self._parse_var() or self._parse_type() 3613 3614 if self._match(TokenType.FROM): 3615 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3616 3617 if not self._match(TokenType.COMMA): 3618 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3619 3620 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3621 3622 def _parse_cast(self, strict: bool) -> exp.Expression: 3623 this = self._parse_conjunction() 3624 3625 if not self._match(TokenType.ALIAS): 3626 if self._match(TokenType.COMMA): 3627 return self.expression( 3628 exp.CastToStrType, this=this, expression=self._parse_string() 3629 ) 3630 else: 3631 self.raise_error("Expected AS after CAST") 3632 3633 to = self._parse_types() 3634 3635 if not to: 3636 self.raise_error("Expected TYPE after CAST") 3637 elif to.this == exp.DataType.Type.CHAR: 3638 if self._match(TokenType.CHARACTER_SET): 3639 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3640 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3641 fmt = self._parse_string() 3642 3643 return self.expression( 3644 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3645 this=this, 3646 format=exp.Literal.string( 3647 format_time( 3648 fmt.this if fmt else "", 3649 self.FORMAT_MAPPING or self.TIME_MAPPING, 3650 self.FORMAT_TRIE or self.TIME_TRIE, 3651 ) 3652 ), 3653 ) 3654 3655 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3656 3657 def _parse_concat(self) -> t.Optional[exp.Expression]: 3658 args = self._parse_csv(self._parse_conjunction) 3659 if self.CONCAT_NULL_OUTPUTS_STRING: 3660 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3661 3662 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3663 # we find such a call we replace it with its argument. 3664 if len(args) == 1: 3665 return args[0] 3666 3667 return self.expression( 3668 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3669 ) 3670 3671 def _parse_string_agg(self) -> exp.Expression: 3672 expression: t.Optional[exp.Expression] 3673 3674 if self._match(TokenType.DISTINCT): 3675 args = self._parse_csv(self._parse_conjunction) 3676 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3677 else: 3678 args = self._parse_csv(self._parse_conjunction) 3679 expression = seq_get(args, 0) 3680 3681 index = self._index 3682 if not self._match(TokenType.R_PAREN): 3683 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3684 order = self._parse_order(this=expression) 3685 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3686 3687 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3688 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3689 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3690 if not self._match_text_seq("WITHIN", "GROUP"): 3691 self._retreat(index) 3692 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3693 3694 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3695 order = self._parse_order(this=expression) 3696 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3697 3698 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3699 to: t.Optional[exp.Expression] 3700 this = self._parse_bitwise() 3701 3702 if self._match(TokenType.USING): 3703 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3704 elif self._match(TokenType.COMMA): 3705 to = self._parse_bitwise() 3706 else: 3707 to = None 3708 3709 # Swap the argument order if needed to produce the correct AST 3710 if self.CONVERT_TYPE_FIRST: 3711 this, to = to, this 3712 3713 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3714 3715 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3716 """ 3717 There are generally two variants of the DECODE function: 3718 3719 - DECODE(bin, charset) 3720 - DECODE(expression, search, result [, search, result] ... [, default]) 3721 3722 The second variant will always be parsed into a CASE expression. Note that NULL 3723 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3724 instead of relying on pattern matching. 3725 """ 3726 args = self._parse_csv(self._parse_conjunction) 3727 3728 if len(args) < 3: 3729 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3730 3731 expression, *expressions = args 3732 if not expression: 3733 return None 3734 3735 ifs = [] 3736 for search, result in zip(expressions[::2], expressions[1::2]): 3737 if not search or not result: 3738 return None 3739 3740 if isinstance(search, exp.Literal): 3741 ifs.append( 3742 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3743 ) 3744 elif isinstance(search, exp.Null): 3745 ifs.append( 3746 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3747 ) 3748 else: 3749 cond = exp.or_( 3750 exp.EQ(this=expression.copy(), expression=search), 3751 exp.and_( 3752 exp.Is(this=expression.copy(), expression=exp.Null()), 3753 exp.Is(this=search.copy(), expression=exp.Null()), 3754 copy=False, 3755 ), 3756 copy=False, 3757 ) 3758 ifs.append(exp.If(this=cond, true=result)) 3759 3760 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3761 3762 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3763 self._match_text_seq("KEY") 3764 key = self._parse_field() 3765 self._match(TokenType.COLON) 3766 self._match_text_seq("VALUE") 3767 value = self._parse_field() 3768 3769 if not key and not value: 3770 return None 3771 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3772 3773 def _parse_json_object(self) -> exp.JSONObject: 3774 star = self._parse_star() 3775 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3776 3777 null_handling = None 3778 if self._match_text_seq("NULL", "ON", "NULL"): 3779 null_handling = "NULL ON NULL" 3780 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3781 null_handling = "ABSENT ON NULL" 3782 3783 unique_keys = None 3784 if self._match_text_seq("WITH", "UNIQUE"): 3785 unique_keys = True 3786 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3787 unique_keys = False 3788 3789 self._match_text_seq("KEYS") 3790 3791 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3792 format_json = self._match_text_seq("FORMAT", "JSON") 3793 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3794 3795 return self.expression( 3796 exp.JSONObject, 3797 expressions=expressions, 3798 null_handling=null_handling, 3799 unique_keys=unique_keys, 3800 return_type=return_type, 3801 format_json=format_json, 3802 encoding=encoding, 3803 ) 3804 3805 def _parse_logarithm(self) -> exp.Func: 3806 # Default argument order is base, expression 3807 args = self._parse_csv(self._parse_range) 3808 3809 if len(args) > 1: 3810 if not self.LOG_BASE_FIRST: 3811 args.reverse() 3812 return exp.Log.from_arg_list(args) 3813 3814 return self.expression( 3815 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3816 ) 3817 3818 def _parse_match_against(self) -> exp.MatchAgainst: 3819 expressions = self._parse_csv(self._parse_column) 3820 3821 self._match_text_seq(")", "AGAINST", "(") 3822 3823 this = self._parse_string() 3824 3825 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3826 modifier = "IN NATURAL LANGUAGE MODE" 3827 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3828 modifier = f"{modifier} WITH QUERY EXPANSION" 3829 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3830 modifier = "IN BOOLEAN MODE" 3831 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3832 modifier = "WITH QUERY EXPANSION" 3833 else: 3834 modifier = None 3835 3836 return self.expression( 3837 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3838 ) 3839 3840 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3841 def _parse_open_json(self) -> exp.OpenJSON: 3842 this = self._parse_bitwise() 3843 path = self._match(TokenType.COMMA) and self._parse_string() 3844 3845 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3846 this = self._parse_field(any_token=True) 3847 kind = self._parse_types() 3848 path = self._parse_string() 3849 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3850 3851 return self.expression( 3852 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3853 ) 3854 3855 expressions = None 3856 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3857 self._match_l_paren() 3858 expressions = self._parse_csv(_parse_open_json_column_def) 3859 3860 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3861 3862 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3863 args = self._parse_csv(self._parse_bitwise) 3864 3865 if self._match(TokenType.IN): 3866 return self.expression( 3867 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3868 ) 3869 3870 if haystack_first: 3871 haystack = seq_get(args, 0) 3872 needle = seq_get(args, 1) 3873 else: 3874 needle = seq_get(args, 0) 3875 haystack = seq_get(args, 1) 3876 3877 return self.expression( 3878 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3879 ) 3880 3881 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3882 args = self._parse_csv(self._parse_table) 3883 return exp.JoinHint(this=func_name.upper(), expressions=args) 3884 3885 def _parse_substring(self) -> exp.Substring: 3886 # Postgres supports the form: substring(string [from int] [for int]) 3887 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3888 3889 args = self._parse_csv(self._parse_bitwise) 3890 3891 if self._match(TokenType.FROM): 3892 args.append(self._parse_bitwise()) 3893 if self._match(TokenType.FOR): 3894 args.append(self._parse_bitwise()) 3895 3896 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3897 3898 def _parse_trim(self) -> exp.Trim: 3899 # https://www.w3resource.com/sql/character-functions/trim.php 3900 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3901 3902 position = None 3903 collation = None 3904 3905 if self._match_texts(self.TRIM_TYPES): 3906 position = self._prev.text.upper() 3907 3908 expression = self._parse_bitwise() 3909 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3910 this = self._parse_bitwise() 3911 else: 3912 this = expression 3913 expression = None 3914 3915 if self._match(TokenType.COLLATE): 3916 collation = self._parse_bitwise() 3917 3918 return self.expression( 3919 exp.Trim, this=this, position=position, expression=expression, collation=collation 3920 ) 3921 3922 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3923 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3924 3925 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3926 return self._parse_window(self._parse_id_var(), alias=True) 3927 3928 def _parse_respect_or_ignore_nulls( 3929 self, this: t.Optional[exp.Expression] 3930 ) -> t.Optional[exp.Expression]: 3931 if self._match_text_seq("IGNORE", "NULLS"): 3932 return self.expression(exp.IgnoreNulls, this=this) 3933 if self._match_text_seq("RESPECT", "NULLS"): 3934 return self.expression(exp.RespectNulls, this=this) 3935 return this 3936 3937 def _parse_window( 3938 self, this: t.Optional[exp.Expression], alias: bool = False 3939 ) -> t.Optional[exp.Expression]: 3940 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3941 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3942 self._match_r_paren() 3943 3944 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3945 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3946 if self._match_text_seq("WITHIN", "GROUP"): 3947 order = self._parse_wrapped(self._parse_order) 3948 this = self.expression(exp.WithinGroup, this=this, expression=order) 3949 3950 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3951 # Some dialects choose to implement and some do not. 3952 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3953 3954 # There is some code above in _parse_lambda that handles 3955 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3956 3957 # The below changes handle 3958 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3959 3960 # Oracle allows both formats 3961 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3962 # and Snowflake chose to do the same for familiarity 3963 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3964 this = self._parse_respect_or_ignore_nulls(this) 3965 3966 # bigquery select from window x AS (partition by ...) 3967 if alias: 3968 over = None 3969 self._match(TokenType.ALIAS) 3970 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3971 return this 3972 else: 3973 over = self._prev.text.upper() 3974 3975 if not self._match(TokenType.L_PAREN): 3976 return self.expression( 3977 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3978 ) 3979 3980 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3981 3982 first = self._match(TokenType.FIRST) 3983 if self._match_text_seq("LAST"): 3984 first = False 3985 3986 partition = self._parse_partition_by() 3987 order = self._parse_order() 3988 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3989 3990 if kind: 3991 self._match(TokenType.BETWEEN) 3992 start = self._parse_window_spec() 3993 self._match(TokenType.AND) 3994 end = self._parse_window_spec() 3995 3996 spec = self.expression( 3997 exp.WindowSpec, 3998 kind=kind, 3999 start=start["value"], 4000 start_side=start["side"], 4001 end=end["value"], 4002 end_side=end["side"], 4003 ) 4004 else: 4005 spec = None 4006 4007 self._match_r_paren() 4008 4009 return self.expression( 4010 exp.Window, 4011 this=this, 4012 partition_by=partition, 4013 order=order, 4014 spec=spec, 4015 alias=window_alias, 4016 over=over, 4017 first=first, 4018 ) 4019 4020 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4021 self._match(TokenType.BETWEEN) 4022 4023 return { 4024 "value": ( 4025 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4026 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4027 or self._parse_bitwise() 4028 ), 4029 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4030 } 4031 4032 def _parse_alias( 4033 self, this: t.Optional[exp.Expression], explicit: bool = False 4034 ) -> t.Optional[exp.Expression]: 4035 any_token = self._match(TokenType.ALIAS) 4036 4037 if explicit and not any_token: 4038 return this 4039 4040 if self._match(TokenType.L_PAREN): 4041 aliases = self.expression( 4042 exp.Aliases, 4043 this=this, 4044 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4045 ) 4046 self._match_r_paren(aliases) 4047 return aliases 4048 4049 alias = self._parse_id_var(any_token) 4050 4051 if alias: 4052 return self.expression(exp.Alias, this=this, alias=alias) 4053 4054 return this 4055 4056 def _parse_id_var( 4057 self, 4058 any_token: bool = True, 4059 tokens: t.Optional[t.Collection[TokenType]] = None, 4060 ) -> t.Optional[exp.Expression]: 4061 identifier = self._parse_identifier() 4062 4063 if identifier: 4064 return identifier 4065 4066 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4067 quoted = self._prev.token_type == TokenType.STRING 4068 return exp.Identifier(this=self._prev.text, quoted=quoted) 4069 4070 return None 4071 4072 def _parse_string(self) -> t.Optional[exp.Expression]: 4073 if self._match(TokenType.STRING): 4074 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4075 return self._parse_placeholder() 4076 4077 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4078 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4079 4080 def _parse_number(self) -> t.Optional[exp.Expression]: 4081 if self._match(TokenType.NUMBER): 4082 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4083 return self._parse_placeholder() 4084 4085 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4086 if self._match(TokenType.IDENTIFIER): 4087 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4088 return self._parse_placeholder() 4089 4090 def _parse_var( 4091 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4092 ) -> t.Optional[exp.Expression]: 4093 if ( 4094 (any_token and self._advance_any()) 4095 or self._match(TokenType.VAR) 4096 or (self._match_set(tokens) if tokens else False) 4097 ): 4098 return self.expression(exp.Var, this=self._prev.text) 4099 return self._parse_placeholder() 4100 4101 def _advance_any(self) -> t.Optional[Token]: 4102 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4103 self._advance() 4104 return self._prev 4105 return None 4106 4107 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4108 return self._parse_var() or self._parse_string() 4109 4110 def _parse_null(self) -> t.Optional[exp.Expression]: 4111 if self._match(TokenType.NULL): 4112 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4113 return None 4114 4115 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4116 if self._match(TokenType.TRUE): 4117 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4118 if self._match(TokenType.FALSE): 4119 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4120 return None 4121 4122 def _parse_star(self) -> t.Optional[exp.Expression]: 4123 if self._match(TokenType.STAR): 4124 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4125 return None 4126 4127 def _parse_parameter(self) -> exp.Parameter: 4128 wrapped = self._match(TokenType.L_BRACE) 4129 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4130 self._match(TokenType.R_BRACE) 4131 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4132 4133 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4134 if self._match_set(self.PLACEHOLDER_PARSERS): 4135 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4136 if placeholder: 4137 return placeholder 4138 self._advance(-1) 4139 return None 4140 4141 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4142 if not self._match(TokenType.EXCEPT): 4143 return None 4144 if self._match(TokenType.L_PAREN, advance=False): 4145 return self._parse_wrapped_csv(self._parse_column) 4146 return self._parse_csv(self._parse_column) 4147 4148 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4149 if not self._match(TokenType.REPLACE): 4150 return None 4151 if self._match(TokenType.L_PAREN, advance=False): 4152 return self._parse_wrapped_csv(self._parse_expression) 4153 return self._parse_csv(self._parse_expression) 4154 4155 def _parse_csv( 4156 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4157 ) -> t.List[t.Optional[exp.Expression]]: 4158 parse_result = parse_method() 4159 items = [parse_result] if parse_result is not None else [] 4160 4161 while self._match(sep): 4162 self._add_comments(parse_result) 4163 parse_result = parse_method() 4164 if parse_result is not None: 4165 items.append(parse_result) 4166 4167 return items 4168 4169 def _parse_tokens( 4170 self, parse_method: t.Callable, expressions: t.Dict 4171 ) -> t.Optional[exp.Expression]: 4172 this = parse_method() 4173 4174 while self._match_set(expressions): 4175 this = self.expression( 4176 expressions[self._prev.token_type], 4177 this=this, 4178 comments=self._prev_comments, 4179 expression=parse_method(), 4180 ) 4181 4182 return this 4183 4184 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4185 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4186 4187 def _parse_wrapped_csv( 4188 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4189 ) -> t.List[t.Optional[exp.Expression]]: 4190 return self._parse_wrapped( 4191 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4192 ) 4193 4194 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4195 wrapped = self._match(TokenType.L_PAREN) 4196 if not wrapped and not optional: 4197 self.raise_error("Expecting (") 4198 parse_result = parse_method() 4199 if wrapped: 4200 self._match_r_paren() 4201 return parse_result 4202 4203 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4204 return self._parse_select() or self._parse_set_operations( 4205 self._parse_expression() if alias else self._parse_conjunction() 4206 ) 4207 4208 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4209 return self._parse_query_modifiers( 4210 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4211 ) 4212 4213 def _parse_transaction(self) -> exp.Transaction: 4214 this = None 4215 if self._match_texts(self.TRANSACTION_KIND): 4216 this = self._prev.text 4217 4218 self._match_texts({"TRANSACTION", "WORK"}) 4219 4220 modes = [] 4221 while True: 4222 mode = [] 4223 while self._match(TokenType.VAR): 4224 mode.append(self._prev.text) 4225 4226 if mode: 4227 modes.append(" ".join(mode)) 4228 if not self._match(TokenType.COMMA): 4229 break 4230 4231 return self.expression(exp.Transaction, this=this, modes=modes) 4232 4233 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4234 chain = None 4235 savepoint = None 4236 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4237 4238 self._match_texts({"TRANSACTION", "WORK"}) 4239 4240 if self._match_text_seq("TO"): 4241 self._match_text_seq("SAVEPOINT") 4242 savepoint = self._parse_id_var() 4243 4244 if self._match(TokenType.AND): 4245 chain = not self._match_text_seq("NO") 4246 self._match_text_seq("CHAIN") 4247 4248 if is_rollback: 4249 return self.expression(exp.Rollback, savepoint=savepoint) 4250 4251 return self.expression(exp.Commit, chain=chain) 4252 4253 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4254 if not self._match_text_seq("ADD"): 4255 return None 4256 4257 self._match(TokenType.COLUMN) 4258 exists_column = self._parse_exists(not_=True) 4259 expression = self._parse_column_def(self._parse_field(any_token=True)) 4260 4261 if expression: 4262 expression.set("exists", exists_column) 4263 4264 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4265 if self._match_texts(("FIRST", "AFTER")): 4266 position = self._prev.text 4267 column_position = self.expression( 4268 exp.ColumnPosition, this=self._parse_column(), position=position 4269 ) 4270 expression.set("position", column_position) 4271 4272 return expression 4273 4274 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4275 drop = self._match(TokenType.DROP) and self._parse_drop() 4276 if drop and not isinstance(drop, exp.Command): 4277 drop.set("kind", drop.args.get("kind", "COLUMN")) 4278 return drop 4279 4280 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4281 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4282 return self.expression( 4283 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4284 ) 4285 4286 def _parse_add_constraint(self) -> exp.AddConstraint: 4287 this = None 4288 kind = self._prev.token_type 4289 4290 if kind == TokenType.CONSTRAINT: 4291 this = self._parse_id_var() 4292 4293 if self._match_text_seq("CHECK"): 4294 expression = self._parse_wrapped(self._parse_conjunction) 4295 enforced = self._match_text_seq("ENFORCED") 4296 4297 return self.expression( 4298 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4299 ) 4300 4301 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4302 expression = self._parse_foreign_key() 4303 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4304 expression = self._parse_primary_key() 4305 else: 4306 expression = None 4307 4308 return self.expression(exp.AddConstraint, this=this, expression=expression) 4309 4310 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4311 index = self._index - 1 4312 4313 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4314 return self._parse_csv(self._parse_add_constraint) 4315 4316 self._retreat(index) 4317 return self._parse_csv(self._parse_add_column) 4318 4319 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4320 self._match(TokenType.COLUMN) 4321 column = self._parse_field(any_token=True) 4322 4323 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4324 return self.expression(exp.AlterColumn, this=column, drop=True) 4325 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4326 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4327 4328 self._match_text_seq("SET", "DATA") 4329 return self.expression( 4330 exp.AlterColumn, 4331 this=column, 4332 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4333 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4334 using=self._match(TokenType.USING) and self._parse_conjunction(), 4335 ) 4336 4337 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4338 index = self._index - 1 4339 4340 partition_exists = self._parse_exists() 4341 if self._match(TokenType.PARTITION, advance=False): 4342 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4343 4344 self._retreat(index) 4345 return self._parse_csv(self._parse_drop_column) 4346 4347 def _parse_alter_table_rename(self) -> exp.RenameTable: 4348 self._match_text_seq("TO") 4349 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4350 4351 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4352 start = self._prev 4353 4354 if not self._match(TokenType.TABLE): 4355 return self._parse_as_command(start) 4356 4357 exists = self._parse_exists() 4358 this = self._parse_table(schema=True) 4359 4360 if self._next: 4361 self._advance() 4362 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4363 4364 if parser: 4365 actions = ensure_list(parser(self)) 4366 4367 if not self._curr: 4368 return self.expression( 4369 exp.AlterTable, 4370 this=this, 4371 exists=exists, 4372 actions=actions, 4373 ) 4374 return self._parse_as_command(start) 4375 4376 def _parse_merge(self) -> exp.Merge: 4377 self._match(TokenType.INTO) 4378 target = self._parse_table() 4379 4380 self._match(TokenType.USING) 4381 using = self._parse_table() 4382 4383 self._match(TokenType.ON) 4384 on = self._parse_conjunction() 4385 4386 whens = [] 4387 while self._match(TokenType.WHEN): 4388 matched = not self._match(TokenType.NOT) 4389 self._match_text_seq("MATCHED") 4390 source = ( 4391 False 4392 if self._match_text_seq("BY", "TARGET") 4393 else self._match_text_seq("BY", "SOURCE") 4394 ) 4395 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4396 4397 self._match(TokenType.THEN) 4398 4399 if self._match(TokenType.INSERT): 4400 _this = self._parse_star() 4401 if _this: 4402 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4403 else: 4404 then = self.expression( 4405 exp.Insert, 4406 this=self._parse_value(), 4407 expression=self._match(TokenType.VALUES) and self._parse_value(), 4408 ) 4409 elif self._match(TokenType.UPDATE): 4410 expressions = self._parse_star() 4411 if expressions: 4412 then = self.expression(exp.Update, expressions=expressions) 4413 else: 4414 then = self.expression( 4415 exp.Update, 4416 expressions=self._match(TokenType.SET) 4417 and self._parse_csv(self._parse_equality), 4418 ) 4419 elif self._match(TokenType.DELETE): 4420 then = self.expression(exp.Var, this=self._prev.text) 4421 else: 4422 then = None 4423 4424 whens.append( 4425 self.expression( 4426 exp.When, 4427 matched=matched, 4428 source=source, 4429 condition=condition, 4430 then=then, 4431 ) 4432 ) 4433 4434 return self.expression( 4435 exp.Merge, 4436 this=target, 4437 using=using, 4438 on=on, 4439 expressions=whens, 4440 ) 4441 4442 def _parse_show(self) -> t.Optional[exp.Expression]: 4443 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4444 if parser: 4445 return parser(self) 4446 self._advance() 4447 return self.expression(exp.Show, this=self._prev.text.upper()) 4448 4449 def _parse_set_item_assignment( 4450 self, kind: t.Optional[str] = None 4451 ) -> t.Optional[exp.Expression]: 4452 index = self._index 4453 4454 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4455 return self._parse_set_transaction(global_=kind == "GLOBAL") 4456 4457 left = self._parse_primary() or self._parse_id_var() 4458 4459 if not self._match_texts(("=", "TO")): 4460 self._retreat(index) 4461 return None 4462 4463 right = self._parse_statement() or self._parse_id_var() 4464 this = self.expression(exp.EQ, this=left, expression=right) 4465 4466 return self.expression(exp.SetItem, this=this, kind=kind) 4467 4468 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4469 self._match_text_seq("TRANSACTION") 4470 characteristics = self._parse_csv( 4471 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4472 ) 4473 return self.expression( 4474 exp.SetItem, 4475 expressions=characteristics, 4476 kind="TRANSACTION", 4477 **{"global": global_}, # type: ignore 4478 ) 4479 4480 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4481 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4482 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4483 4484 def _parse_set(self) -> exp.Set | exp.Command: 4485 index = self._index 4486 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4487 4488 if self._curr: 4489 self._retreat(index) 4490 return self._parse_as_command(self._prev) 4491 4492 return set_ 4493 4494 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4495 for option in options: 4496 if self._match_text_seq(*option.split(" ")): 4497 return exp.var(option) 4498 return None 4499 4500 def _parse_as_command(self, start: Token) -> exp.Command: 4501 while self._curr: 4502 self._advance() 4503 text = self._find_sql(start, self._prev) 4504 size = len(start.text) 4505 return exp.Command(this=text[:size], expression=text[size:]) 4506 4507 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4508 settings = [] 4509 4510 self._match_l_paren() 4511 kind = self._parse_id_var() 4512 4513 if self._match(TokenType.L_PAREN): 4514 while True: 4515 key = self._parse_id_var() 4516 value = self._parse_primary() 4517 4518 if not key and value is None: 4519 break 4520 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4521 self._match(TokenType.R_PAREN) 4522 4523 self._match_r_paren() 4524 4525 return self.expression( 4526 exp.DictProperty, 4527 this=this, 4528 kind=kind.this if kind else None, 4529 settings=settings, 4530 ) 4531 4532 def _parse_dict_range(self, this: str) -> exp.DictRange: 4533 self._match_l_paren() 4534 has_min = self._match_text_seq("MIN") 4535 if has_min: 4536 min = self._parse_var() or self._parse_primary() 4537 self._match_text_seq("MAX") 4538 max = self._parse_var() or self._parse_primary() 4539 else: 4540 max = self._parse_var() or self._parse_primary() 4541 min = exp.Literal.number(0) 4542 self._match_r_paren() 4543 return self.expression(exp.DictRange, this=this, min=min, max=max) 4544 4545 def _find_parser( 4546 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4547 ) -> t.Optional[t.Callable]: 4548 if not self._curr: 4549 return None 4550 4551 index = self._index 4552 this = [] 4553 while True: 4554 # The current token might be multiple words 4555 curr = self._curr.text.upper() 4556 key = curr.split(" ") 4557 this.append(curr) 4558 self._advance() 4559 result, trie = in_trie(trie, key) 4560 if result == 0: 4561 break 4562 if result == 2: 4563 subparser = parsers[" ".join(this)] 4564 return subparser 4565 self._retreat(index) 4566 return None 4567 4568 def _match(self, token_type, advance=True, expression=None): 4569 if not self._curr: 4570 return None 4571 4572 if self._curr.token_type == token_type: 4573 if advance: 4574 self._advance() 4575 self._add_comments(expression) 4576 return True 4577 4578 return None 4579 4580 def _match_set(self, types, advance=True): 4581 if not self._curr: 4582 return None 4583 4584 if self._curr.token_type in types: 4585 if advance: 4586 self._advance() 4587 return True 4588 4589 return None 4590 4591 def _match_pair(self, token_type_a, token_type_b, advance=True): 4592 if not self._curr or not self._next: 4593 return None 4594 4595 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4596 if advance: 4597 self._advance(2) 4598 return True 4599 4600 return None 4601 4602 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4603 if not self._match(TokenType.L_PAREN, expression=expression): 4604 self.raise_error("Expecting (") 4605 4606 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4607 if not self._match(TokenType.R_PAREN, expression=expression): 4608 self.raise_error("Expecting )") 4609 4610 def _match_texts(self, texts, advance=True): 4611 if self._curr and self._curr.text.upper() in texts: 4612 if advance: 4613 self._advance() 4614 return True 4615 return False 4616 4617 def _match_text_seq(self, *texts, advance=True): 4618 index = self._index 4619 for text in texts: 4620 if self._curr and self._curr.text.upper() == text: 4621 self._advance() 4622 else: 4623 self._retreat(index) 4624 return False 4625 4626 if not advance: 4627 self._retreat(index) 4628 4629 return True 4630 4631 @t.overload 4632 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4633 ... 4634 4635 @t.overload 4636 def _replace_columns_with_dots( 4637 self, this: t.Optional[exp.Expression] 4638 ) -> t.Optional[exp.Expression]: 4639 ... 4640 4641 def _replace_columns_with_dots(self, this): 4642 if isinstance(this, exp.Dot): 4643 exp.replace_children(this, self._replace_columns_with_dots) 4644 elif isinstance(this, exp.Column): 4645 exp.replace_children(this, self._replace_columns_with_dots) 4646 table = this.args.get("table") 4647 this = ( 4648 self.expression(exp.Dot, this=table, expression=this.this) 4649 if table 4650 else self.expression(exp.Var, this=this.name) 4651 ) 4652 elif isinstance(this, exp.Identifier): 4653 this = self.expression(exp.Var, this=this.name) 4654 4655 return this 4656 4657 def _replace_lambda( 4658 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4659 ) -> t.Optional[exp.Expression]: 4660 if not node: 4661 return node 4662 4663 for column in node.find_all(exp.Column): 4664 if column.parts[0].name in lambda_variables: 4665 dot_or_id = column.to_dot() if column.table else column.this 4666 parent = column.parent 4667 4668 while isinstance(parent, exp.Dot): 4669 if not isinstance(parent.parent, exp.Dot): 4670 parent.replace(dot_or_id) 4671 break 4672 parent = parent.parent 4673 else: 4674 if column is node: 4675 node = dot_or_id 4676 else: 4677 column.replace(dot_or_id) 4678 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 TYPE_TOKENS = { 114 TokenType.BIT, 115 TokenType.BOOLEAN, 116 TokenType.TINYINT, 117 TokenType.UTINYINT, 118 TokenType.SMALLINT, 119 TokenType.USMALLINT, 120 TokenType.INT, 121 TokenType.UINT, 122 TokenType.BIGINT, 123 TokenType.UBIGINT, 124 TokenType.INT128, 125 TokenType.UINT128, 126 TokenType.INT256, 127 TokenType.UINT256, 128 TokenType.FLOAT, 129 TokenType.DOUBLE, 130 TokenType.CHAR, 131 TokenType.NCHAR, 132 TokenType.VARCHAR, 133 TokenType.NVARCHAR, 134 TokenType.TEXT, 135 TokenType.MEDIUMTEXT, 136 TokenType.LONGTEXT, 137 TokenType.MEDIUMBLOB, 138 TokenType.LONGBLOB, 139 TokenType.BINARY, 140 TokenType.VARBINARY, 141 TokenType.JSON, 142 TokenType.JSONB, 143 TokenType.INTERVAL, 144 TokenType.TIME, 145 TokenType.TIMESTAMP, 146 TokenType.TIMESTAMPTZ, 147 TokenType.TIMESTAMPLTZ, 148 TokenType.DATETIME, 149 TokenType.DATETIME64, 150 TokenType.DATE, 151 TokenType.INT4RANGE, 152 TokenType.INT4MULTIRANGE, 153 TokenType.INT8RANGE, 154 TokenType.INT8MULTIRANGE, 155 TokenType.NUMRANGE, 156 TokenType.NUMMULTIRANGE, 157 TokenType.TSRANGE, 158 TokenType.TSMULTIRANGE, 159 TokenType.TSTZRANGE, 160 TokenType.TSTZMULTIRANGE, 161 TokenType.DATERANGE, 162 TokenType.DATEMULTIRANGE, 163 TokenType.DECIMAL, 164 TokenType.BIGDECIMAL, 165 TokenType.UUID, 166 TokenType.GEOGRAPHY, 167 TokenType.GEOMETRY, 168 TokenType.HLLSKETCH, 169 TokenType.HSTORE, 170 TokenType.PSEUDO_TYPE, 171 TokenType.SUPER, 172 TokenType.SERIAL, 173 TokenType.SMALLSERIAL, 174 TokenType.BIGSERIAL, 175 TokenType.XML, 176 TokenType.UNIQUEIDENTIFIER, 177 TokenType.MONEY, 178 TokenType.SMALLMONEY, 179 TokenType.ROWVERSION, 180 TokenType.IMAGE, 181 TokenType.VARIANT, 182 TokenType.OBJECT, 183 TokenType.INET, 184 *NESTED_TYPE_TOKENS, 185 } 186 187 SUBQUERY_PREDICATES = { 188 TokenType.ANY: exp.Any, 189 TokenType.ALL: exp.All, 190 TokenType.EXISTS: exp.Exists, 191 TokenType.SOME: exp.Any, 192 } 193 194 RESERVED_KEYWORDS = { 195 *Tokenizer.SINGLE_TOKENS.values(), 196 TokenType.SELECT, 197 } 198 199 DB_CREATABLES = { 200 TokenType.DATABASE, 201 TokenType.SCHEMA, 202 TokenType.TABLE, 203 TokenType.VIEW, 204 TokenType.DICTIONARY, 205 } 206 207 CREATABLES = { 208 TokenType.COLUMN, 209 TokenType.FUNCTION, 210 TokenType.INDEX, 211 TokenType.PROCEDURE, 212 *DB_CREATABLES, 213 } 214 215 # Tokens that can represent identifiers 216 ID_VAR_TOKENS = { 217 TokenType.VAR, 218 TokenType.ANTI, 219 TokenType.APPLY, 220 TokenType.ASC, 221 TokenType.AUTO_INCREMENT, 222 TokenType.BEGIN, 223 TokenType.CACHE, 224 TokenType.CASE, 225 TokenType.COLLATE, 226 TokenType.COMMAND, 227 TokenType.COMMENT, 228 TokenType.COMMIT, 229 TokenType.CONSTRAINT, 230 TokenType.DEFAULT, 231 TokenType.DELETE, 232 TokenType.DESC, 233 TokenType.DESCRIBE, 234 TokenType.DICTIONARY, 235 TokenType.DIV, 236 TokenType.END, 237 TokenType.EXECUTE, 238 TokenType.ESCAPE, 239 TokenType.FALSE, 240 TokenType.FIRST, 241 TokenType.FILTER, 242 TokenType.FORMAT, 243 TokenType.FULL, 244 TokenType.IF, 245 TokenType.IS, 246 TokenType.ISNULL, 247 TokenType.INTERVAL, 248 TokenType.KEEP, 249 TokenType.LEFT, 250 TokenType.LOAD, 251 TokenType.MERGE, 252 TokenType.NATURAL, 253 TokenType.NEXT, 254 TokenType.OFFSET, 255 TokenType.ORDINALITY, 256 TokenType.OVERWRITE, 257 TokenType.PARTITION, 258 TokenType.PERCENT, 259 TokenType.PIVOT, 260 TokenType.PRAGMA, 261 TokenType.RANGE, 262 TokenType.REFERENCES, 263 TokenType.RIGHT, 264 TokenType.ROW, 265 TokenType.ROWS, 266 TokenType.SEMI, 267 TokenType.SET, 268 TokenType.SETTINGS, 269 TokenType.SHOW, 270 TokenType.TEMPORARY, 271 TokenType.TOP, 272 TokenType.TRUE, 273 TokenType.UNIQUE, 274 TokenType.UNPIVOT, 275 TokenType.UPDATE, 276 TokenType.VOLATILE, 277 TokenType.WINDOW, 278 *CREATABLES, 279 *SUBQUERY_PREDICATES, 280 *TYPE_TOKENS, 281 *NO_PAREN_FUNCTIONS, 282 } 283 284 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 285 286 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 287 TokenType.APPLY, 288 TokenType.ASOF, 289 TokenType.FULL, 290 TokenType.LEFT, 291 TokenType.LOCK, 292 TokenType.NATURAL, 293 TokenType.OFFSET, 294 TokenType.RIGHT, 295 TokenType.WINDOW, 296 } 297 298 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 299 300 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 301 302 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 303 304 FUNC_TOKENS = { 305 TokenType.COMMAND, 306 TokenType.CURRENT_DATE, 307 TokenType.CURRENT_DATETIME, 308 TokenType.CURRENT_TIMESTAMP, 309 TokenType.CURRENT_TIME, 310 TokenType.CURRENT_USER, 311 TokenType.FILTER, 312 TokenType.FIRST, 313 TokenType.FORMAT, 314 TokenType.GLOB, 315 TokenType.IDENTIFIER, 316 TokenType.INDEX, 317 TokenType.ISNULL, 318 TokenType.ILIKE, 319 TokenType.LIKE, 320 TokenType.MERGE, 321 TokenType.OFFSET, 322 TokenType.PRIMARY_KEY, 323 TokenType.RANGE, 324 TokenType.REPLACE, 325 TokenType.ROW, 326 TokenType.UNNEST, 327 TokenType.VAR, 328 TokenType.LEFT, 329 TokenType.RIGHT, 330 TokenType.DATE, 331 TokenType.DATETIME, 332 TokenType.TABLE, 333 TokenType.TIMESTAMP, 334 TokenType.TIMESTAMPTZ, 335 TokenType.WINDOW, 336 *TYPE_TOKENS, 337 *SUBQUERY_PREDICATES, 338 } 339 340 CONJUNCTION = { 341 TokenType.AND: exp.And, 342 TokenType.OR: exp.Or, 343 } 344 345 EQUALITY = { 346 TokenType.EQ: exp.EQ, 347 TokenType.NEQ: exp.NEQ, 348 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 349 } 350 351 COMPARISON = { 352 TokenType.GT: exp.GT, 353 TokenType.GTE: exp.GTE, 354 TokenType.LT: exp.LT, 355 TokenType.LTE: exp.LTE, 356 } 357 358 BITWISE = { 359 TokenType.AMP: exp.BitwiseAnd, 360 TokenType.CARET: exp.BitwiseXor, 361 TokenType.PIPE: exp.BitwiseOr, 362 TokenType.DPIPE: exp.DPipe, 363 } 364 365 TERM = { 366 TokenType.DASH: exp.Sub, 367 TokenType.PLUS: exp.Add, 368 TokenType.MOD: exp.Mod, 369 TokenType.COLLATE: exp.Collate, 370 } 371 372 FACTOR = { 373 TokenType.DIV: exp.IntDiv, 374 TokenType.LR_ARROW: exp.Distance, 375 TokenType.SLASH: exp.Div, 376 TokenType.STAR: exp.Mul, 377 } 378 379 TIMESTAMPS = { 380 TokenType.TIME, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMPTZ, 383 TokenType.TIMESTAMPLTZ, 384 } 385 386 SET_OPERATIONS = { 387 TokenType.UNION, 388 TokenType.INTERSECT, 389 TokenType.EXCEPT, 390 } 391 392 JOIN_METHODS = { 393 TokenType.NATURAL, 394 TokenType.ASOF, 395 } 396 397 JOIN_SIDES = { 398 TokenType.LEFT, 399 TokenType.RIGHT, 400 TokenType.FULL, 401 } 402 403 JOIN_KINDS = { 404 TokenType.INNER, 405 TokenType.OUTER, 406 TokenType.CROSS, 407 TokenType.SEMI, 408 TokenType.ANTI, 409 } 410 411 JOIN_HINTS: t.Set[str] = set() 412 413 LAMBDAS = { 414 TokenType.ARROW: lambda self, expressions: self.expression( 415 exp.Lambda, 416 this=self._replace_lambda( 417 self._parse_conjunction(), 418 {node.name for node in expressions}, 419 ), 420 expressions=expressions, 421 ), 422 TokenType.FARROW: lambda self, expressions: self.expression( 423 exp.Kwarg, 424 this=exp.var(expressions[0].name), 425 expression=self._parse_conjunction(), 426 ), 427 } 428 429 COLUMN_OPERATORS = { 430 TokenType.DOT: None, 431 TokenType.DCOLON: lambda self, this, to: self.expression( 432 exp.Cast if self.STRICT_CAST else exp.TryCast, 433 this=this, 434 to=to, 435 ), 436 TokenType.ARROW: lambda self, this, path: self.expression( 437 exp.JSONExtract, 438 this=this, 439 expression=path, 440 ), 441 TokenType.DARROW: lambda self, this, path: self.expression( 442 exp.JSONExtractScalar, 443 this=this, 444 expression=path, 445 ), 446 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 447 exp.JSONBExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 457 exp.JSONBContains, 458 this=this, 459 expression=key, 460 ), 461 } 462 463 EXPRESSION_PARSERS = { 464 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 465 exp.Column: lambda self: self._parse_column(), 466 exp.Condition: lambda self: self._parse_conjunction(), 467 exp.DataType: lambda self: self._parse_types(), 468 exp.Expression: lambda self: self._parse_statement(), 469 exp.From: lambda self: self._parse_from(), 470 exp.Group: lambda self: self._parse_group(), 471 exp.Having: lambda self: self._parse_having(), 472 exp.Identifier: lambda self: self._parse_id_var(), 473 exp.Join: lambda self: self._parse_join(), 474 exp.Lambda: lambda self: self._parse_lambda(), 475 exp.Lateral: lambda self: self._parse_lateral(), 476 exp.Limit: lambda self: self._parse_limit(), 477 exp.Offset: lambda self: self._parse_offset(), 478 exp.Order: lambda self: self._parse_order(), 479 exp.Ordered: lambda self: self._parse_ordered(), 480 exp.Properties: lambda self: self._parse_properties(), 481 exp.Qualify: lambda self: self._parse_qualify(), 482 exp.Returning: lambda self: self._parse_returning(), 483 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 484 exp.Table: lambda self: self._parse_table_parts(), 485 exp.TableAlias: lambda self: self._parse_table_alias(), 486 exp.Where: lambda self: self._parse_where(), 487 exp.Window: lambda self: self._parse_named_window(), 488 exp.With: lambda self: self._parse_with(), 489 "JOIN_TYPE": lambda self: self._parse_join_parts(), 490 } 491 492 STATEMENT_PARSERS = { 493 TokenType.ALTER: lambda self: self._parse_alter(), 494 TokenType.BEGIN: lambda self: self._parse_transaction(), 495 TokenType.CACHE: lambda self: self._parse_cache(), 496 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 497 TokenType.COMMENT: lambda self: self._parse_comment(), 498 TokenType.CREATE: lambda self: self._parse_create(), 499 TokenType.DELETE: lambda self: self._parse_delete(), 500 TokenType.DESC: lambda self: self._parse_describe(), 501 TokenType.DESCRIBE: lambda self: self._parse_describe(), 502 TokenType.DROP: lambda self: self._parse_drop(), 503 TokenType.END: lambda self: self._parse_commit_or_rollback(), 504 TokenType.FROM: lambda self: exp.select("*").from_( 505 t.cast(exp.From, self._parse_from(skip_from_token=True)) 506 ), 507 TokenType.INSERT: lambda self: self._parse_insert(), 508 TokenType.LOAD: lambda self: self._parse_load(), 509 TokenType.MERGE: lambda self: self._parse_merge(), 510 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 511 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 512 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 513 TokenType.SET: lambda self: self._parse_set(), 514 TokenType.UNCACHE: lambda self: self._parse_uncache(), 515 TokenType.UPDATE: lambda self: self._parse_update(), 516 TokenType.USE: lambda self: self.expression( 517 exp.Use, 518 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 519 and exp.var(self._prev.text), 520 this=self._parse_table(schema=False), 521 ), 522 } 523 524 UNARY_PARSERS = { 525 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 526 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 527 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 528 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 529 } 530 531 PRIMARY_PARSERS = { 532 TokenType.STRING: lambda self, token: self.expression( 533 exp.Literal, this=token.text, is_string=True 534 ), 535 TokenType.NUMBER: lambda self, token: self.expression( 536 exp.Literal, this=token.text, is_string=False 537 ), 538 TokenType.STAR: lambda self, _: self.expression( 539 exp.Star, 540 **{"except": self._parse_except(), "replace": self._parse_replace()}, 541 ), 542 TokenType.NULL: lambda self, _: self.expression(exp.Null), 543 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 544 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 545 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 546 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 547 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 548 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 549 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 550 exp.National, this=token.text 551 ), 552 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 553 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 554 } 555 556 PLACEHOLDER_PARSERS = { 557 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 558 TokenType.PARAMETER: lambda self: self._parse_parameter(), 559 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 560 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 561 else None, 562 } 563 564 RANGE_PARSERS = { 565 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 566 TokenType.GLOB: binary_range_parser(exp.Glob), 567 TokenType.ILIKE: binary_range_parser(exp.ILike), 568 TokenType.IN: lambda self, this: self._parse_in(this), 569 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 570 TokenType.IS: lambda self, this: self._parse_is(this), 571 TokenType.LIKE: binary_range_parser(exp.Like), 572 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 573 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 574 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 575 } 576 577 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 578 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 579 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 580 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 581 "CHARACTER SET": lambda self: self._parse_character_set(), 582 "CHECKSUM": lambda self: self._parse_checksum(), 583 "CLUSTER": lambda self: self._parse_cluster(), 584 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 585 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 586 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 587 "DEFINER": lambda self: self._parse_definer(), 588 "DETERMINISTIC": lambda self: self.expression( 589 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 590 ), 591 "DISTKEY": lambda self: self._parse_distkey(), 592 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 593 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 594 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 595 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 596 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 597 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 598 "FREESPACE": lambda self: self._parse_freespace(), 599 "IMMUTABLE": lambda self: self.expression( 600 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 601 ), 602 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 603 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 604 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 605 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 606 "LIKE": lambda self: self._parse_create_like(), 607 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 608 "LOCK": lambda self: self._parse_locking(), 609 "LOCKING": lambda self: self._parse_locking(), 610 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 611 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 612 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 613 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 614 "NO": lambda self: self._parse_no_property(), 615 "ON": lambda self: self._parse_on_property(), 616 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 617 "PARTITION BY": lambda self: self._parse_partitioned_by(), 618 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 619 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 620 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 621 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 622 "RETURNS": lambda self: self._parse_returns(), 623 "ROW": lambda self: self._parse_row(), 624 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 625 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 626 "SETTINGS": lambda self: self.expression( 627 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 628 ), 629 "SORTKEY": lambda self: self._parse_sortkey(), 630 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 631 "STABLE": lambda self: self.expression( 632 exp.StabilityProperty, this=exp.Literal.string("STABLE") 633 ), 634 "STORED": lambda self: self._parse_stored(), 635 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 636 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 637 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 638 "TO": lambda self: self._parse_to_table(), 639 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 640 "TTL": lambda self: self._parse_ttl(), 641 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 642 "VOLATILE": lambda self: self._parse_volatile_property(), 643 "WITH": lambda self: self._parse_with_property(), 644 } 645 646 CONSTRAINT_PARSERS = { 647 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 648 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 649 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 650 "CHARACTER SET": lambda self: self.expression( 651 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 652 ), 653 "CHECK": lambda self: self.expression( 654 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 655 ), 656 "COLLATE": lambda self: self.expression( 657 exp.CollateColumnConstraint, this=self._parse_var() 658 ), 659 "COMMENT": lambda self: self.expression( 660 exp.CommentColumnConstraint, this=self._parse_string() 661 ), 662 "COMPRESS": lambda self: self._parse_compress(), 663 "DEFAULT": lambda self: self.expression( 664 exp.DefaultColumnConstraint, this=self._parse_bitwise() 665 ), 666 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 667 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 668 "FORMAT": lambda self: self.expression( 669 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 670 ), 671 "GENERATED": lambda self: self._parse_generated_as_identity(), 672 "IDENTITY": lambda self: self._parse_auto_increment(), 673 "INLINE": lambda self: self._parse_inline(), 674 "LIKE": lambda self: self._parse_create_like(), 675 "NOT": lambda self: self._parse_not_constraint(), 676 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 677 "ON": lambda self: self._match(TokenType.UPDATE) 678 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 679 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 680 "PRIMARY KEY": lambda self: self._parse_primary_key(), 681 "REFERENCES": lambda self: self._parse_references(match=False), 682 "TITLE": lambda self: self.expression( 683 exp.TitleColumnConstraint, this=self._parse_var_or_string() 684 ), 685 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 686 "UNIQUE": lambda self: self._parse_unique(), 687 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 688 } 689 690 ALTER_PARSERS = { 691 "ADD": lambda self: self._parse_alter_table_add(), 692 "ALTER": lambda self: self._parse_alter_table_alter(), 693 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 694 "DROP": lambda self: self._parse_alter_table_drop(), 695 "RENAME": lambda self: self._parse_alter_table_rename(), 696 } 697 698 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 699 700 NO_PAREN_FUNCTION_PARSERS = { 701 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 702 TokenType.CASE: lambda self: self._parse_case(), 703 TokenType.IF: lambda self: self._parse_if(), 704 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 705 exp.NextValueFor, 706 this=self._parse_column(), 707 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 708 ), 709 } 710 711 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 712 713 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 714 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 715 "CONCAT": lambda self: self._parse_concat(), 716 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 717 "DECODE": lambda self: self._parse_decode(), 718 "EXTRACT": lambda self: self._parse_extract(), 719 "JSON_OBJECT": lambda self: self._parse_json_object(), 720 "LOG": lambda self: self._parse_logarithm(), 721 "MATCH": lambda self: self._parse_match_against(), 722 "OPENJSON": lambda self: self._parse_open_json(), 723 "POSITION": lambda self: self._parse_position(), 724 "SAFE_CAST": lambda self: self._parse_cast(False), 725 "STRING_AGG": lambda self: self._parse_string_agg(), 726 "SUBSTRING": lambda self: self._parse_substring(), 727 "TRIM": lambda self: self._parse_trim(), 728 "TRY_CAST": lambda self: self._parse_cast(False), 729 "TRY_CONVERT": lambda self: self._parse_convert(False), 730 } 731 732 QUERY_MODIFIER_PARSERS = { 733 "joins": lambda self: list(iter(self._parse_join, None)), 734 "laterals": lambda self: list(iter(self._parse_lateral, None)), 735 "match": lambda self: self._parse_match_recognize(), 736 "where": lambda self: self._parse_where(), 737 "group": lambda self: self._parse_group(), 738 "having": lambda self: self._parse_having(), 739 "qualify": lambda self: self._parse_qualify(), 740 "windows": lambda self: self._parse_window_clause(), 741 "order": lambda self: self._parse_order(), 742 "limit": lambda self: self._parse_limit(), 743 "offset": lambda self: self._parse_offset(), 744 "locks": lambda self: self._parse_locks(), 745 "sample": lambda self: self._parse_table_sample(as_modifier=True), 746 } 747 748 SET_PARSERS = { 749 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 750 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 751 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 752 "TRANSACTION": lambda self: self._parse_set_transaction(), 753 } 754 755 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 756 757 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 758 759 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 760 761 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 762 763 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 764 765 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 766 TRANSACTION_CHARACTERISTICS = { 767 "ISOLATION LEVEL REPEATABLE READ", 768 "ISOLATION LEVEL READ COMMITTED", 769 "ISOLATION LEVEL READ UNCOMMITTED", 770 "ISOLATION LEVEL SERIALIZABLE", 771 "READ WRITE", 772 "READ ONLY", 773 } 774 775 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 776 777 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 778 779 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 780 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 781 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 782 783 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 784 785 STRICT_CAST = True 786 787 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 788 789 CONVERT_TYPE_FIRST = False 790 791 PREFIXED_PIVOT_COLUMNS = False 792 IDENTIFY_PIVOT_STRINGS = False 793 794 LOG_BASE_FIRST = True 795 LOG_DEFAULTS_TO_LN = False 796 797 __slots__ = ( 798 "error_level", 799 "error_message_context", 800 "max_errors", 801 "sql", 802 "errors", 803 "_tokens", 804 "_index", 805 "_curr", 806 "_next", 807 "_prev", 808 "_prev_comments", 809 ) 810 811 # Autofilled 812 INDEX_OFFSET: int = 0 813 UNNEST_COLUMN_ONLY: bool = False 814 ALIAS_POST_TABLESAMPLE: bool = False 815 STRICT_STRING_CONCAT = False 816 NULL_ORDERING: str = "nulls_are_small" 817 SHOW_TRIE: t.Dict = {} 818 SET_TRIE: t.Dict = {} 819 FORMAT_MAPPING: t.Dict[str, str] = {} 820 FORMAT_TRIE: t.Dict = {} 821 TIME_MAPPING: t.Dict[str, str] = {} 822 TIME_TRIE: t.Dict = {} 823 824 def __init__( 825 self, 826 error_level: t.Optional[ErrorLevel] = None, 827 error_message_context: int = 100, 828 max_errors: int = 3, 829 ): 830 self.error_level = error_level or ErrorLevel.IMMEDIATE 831 self.error_message_context = error_message_context 832 self.max_errors = max_errors 833 self.reset() 834 835 def reset(self): 836 self.sql = "" 837 self.errors = [] 838 self._tokens = [] 839 self._index = 0 840 self._curr = None 841 self._next = None 842 self._prev = None 843 self._prev_comments = None 844 845 def parse( 846 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 847 ) -> t.List[t.Optional[exp.Expression]]: 848 """ 849 Parses a list of tokens and returns a list of syntax trees, one tree 850 per parsed SQL statement. 851 852 Args: 853 raw_tokens: The list of tokens. 854 sql: The original SQL string, used to produce helpful debug messages. 855 856 Returns: 857 The list of the produced syntax trees. 858 """ 859 return self._parse( 860 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 861 ) 862 863 def parse_into( 864 self, 865 expression_types: exp.IntoType, 866 raw_tokens: t.List[Token], 867 sql: t.Optional[str] = None, 868 ) -> t.List[t.Optional[exp.Expression]]: 869 """ 870 Parses a list of tokens into a given Expression type. If a collection of Expression 871 types is given instead, this method will try to parse the token list into each one 872 of them, stopping at the first for which the parsing succeeds. 873 874 Args: 875 expression_types: The expression type(s) to try and parse the token list into. 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The target Expression. 881 """ 882 errors = [] 883 for expression_type in ensure_list(expression_types): 884 parser = self.EXPRESSION_PARSERS.get(expression_type) 885 if not parser: 886 raise TypeError(f"No parser registered for {expression_type}") 887 888 try: 889 return self._parse(parser, raw_tokens, sql) 890 except ParseError as e: 891 e.errors[0]["into_expression"] = expression_type 892 errors.append(e) 893 894 raise ParseError( 895 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 896 errors=merge_errors(errors), 897 ) from errors[-1] 898 899 def _parse( 900 self, 901 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 902 raw_tokens: t.List[Token], 903 sql: t.Optional[str] = None, 904 ) -> t.List[t.Optional[exp.Expression]]: 905 self.reset() 906 self.sql = sql or "" 907 908 total = len(raw_tokens) 909 chunks: t.List[t.List[Token]] = [[]] 910 911 for i, token in enumerate(raw_tokens): 912 if token.token_type == TokenType.SEMICOLON: 913 if i < total - 1: 914 chunks.append([]) 915 else: 916 chunks[-1].append(token) 917 918 expressions = [] 919 920 for tokens in chunks: 921 self._index = -1 922 self._tokens = tokens 923 self._advance() 924 925 expressions.append(parse_method(self)) 926 927 if self._index < len(self._tokens): 928 self.raise_error("Invalid expression / Unexpected token") 929 930 self.check_errors() 931 932 return expressions 933 934 def check_errors(self) -> None: 935 """Logs or raises any found errors, depending on the chosen error level setting.""" 936 if self.error_level == ErrorLevel.WARN: 937 for error in self.errors: 938 logger.error(str(error)) 939 elif self.error_level == ErrorLevel.RAISE and self.errors: 940 raise ParseError( 941 concat_messages(self.errors, self.max_errors), 942 errors=merge_errors(self.errors), 943 ) 944 945 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 946 """ 947 Appends an error in the list of recorded errors or raises it, depending on the chosen 948 error level setting. 949 """ 950 token = token or self._curr or self._prev or Token.string("") 951 start = token.start 952 end = token.end + 1 953 start_context = self.sql[max(start - self.error_message_context, 0) : start] 954 highlight = self.sql[start:end] 955 end_context = self.sql[end : end + self.error_message_context] 956 957 error = ParseError.new( 958 f"{message}. Line {token.line}, Col: {token.col}.\n" 959 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 960 description=message, 961 line=token.line, 962 col=token.col, 963 start_context=start_context, 964 highlight=highlight, 965 end_context=end_context, 966 ) 967 968 if self.error_level == ErrorLevel.IMMEDIATE: 969 raise error 970 971 self.errors.append(error) 972 973 def expression( 974 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 975 ) -> E: 976 """ 977 Creates a new, validated Expression. 978 979 Args: 980 exp_class: The expression class to instantiate. 981 comments: An optional list of comments to attach to the expression. 982 kwargs: The arguments to set for the expression along with their respective values. 983 984 Returns: 985 The target expression. 986 """ 987 instance = exp_class(**kwargs) 988 instance.add_comments(comments) if comments else self._add_comments(instance) 989 return self.validate_expression(instance) 990 991 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 992 if expression and self._prev_comments: 993 expression.add_comments(self._prev_comments) 994 self._prev_comments = None 995 996 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 997 """ 998 Validates an Expression, making sure that all its mandatory arguments are set. 999 1000 Args: 1001 expression: The expression to validate. 1002 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1003 1004 Returns: 1005 The validated expression. 1006 """ 1007 if self.error_level != ErrorLevel.IGNORE: 1008 for error_message in expression.error_messages(args): 1009 self.raise_error(error_message) 1010 1011 return expression 1012 1013 def _find_sql(self, start: Token, end: Token) -> str: 1014 return self.sql[start.start : end.end + 1] 1015 1016 def _advance(self, times: int = 1) -> None: 1017 self._index += times 1018 self._curr = seq_get(self._tokens, self._index) 1019 self._next = seq_get(self._tokens, self._index + 1) 1020 1021 if self._index > 0: 1022 self._prev = self._tokens[self._index - 1] 1023 self._prev_comments = self._prev.comments 1024 else: 1025 self._prev = None 1026 self._prev_comments = None 1027 1028 def _retreat(self, index: int) -> None: 1029 if index != self._index: 1030 self._advance(index - self._index) 1031 1032 def _parse_command(self) -> exp.Command: 1033 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1034 1035 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1036 start = self._prev 1037 exists = self._parse_exists() if allow_exists else None 1038 1039 self._match(TokenType.ON) 1040 1041 kind = self._match_set(self.CREATABLES) and self._prev 1042 if not kind: 1043 return self._parse_as_command(start) 1044 1045 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1046 this = self._parse_user_defined_function(kind=kind.token_type) 1047 elif kind.token_type == TokenType.TABLE: 1048 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1049 elif kind.token_type == TokenType.COLUMN: 1050 this = self._parse_column() 1051 else: 1052 this = self._parse_id_var() 1053 1054 self._match(TokenType.IS) 1055 1056 return self.expression( 1057 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1058 ) 1059 1060 def _parse_to_table( 1061 self, 1062 ) -> exp.ToTableProperty: 1063 table = self._parse_table_parts(schema=True) 1064 return self.expression(exp.ToTableProperty, this=table) 1065 1066 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1067 def _parse_ttl(self) -> exp.Expression: 1068 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1069 this = self._parse_bitwise() 1070 1071 if self._match_text_seq("DELETE"): 1072 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1073 if self._match_text_seq("RECOMPRESS"): 1074 return self.expression( 1075 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1076 ) 1077 if self._match_text_seq("TO", "DISK"): 1078 return self.expression( 1079 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1080 ) 1081 if self._match_text_seq("TO", "VOLUME"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1084 ) 1085 1086 return this 1087 1088 expressions = self._parse_csv(_parse_ttl_action) 1089 where = self._parse_where() 1090 group = self._parse_group() 1091 1092 aggregates = None 1093 if group and self._match(TokenType.SET): 1094 aggregates = self._parse_csv(self._parse_set_item) 1095 1096 return self.expression( 1097 exp.MergeTreeTTL, 1098 expressions=expressions, 1099 where=where, 1100 group=group, 1101 aggregates=aggregates, 1102 ) 1103 1104 def _parse_statement(self) -> t.Optional[exp.Expression]: 1105 if self._curr is None: 1106 return None 1107 1108 if self._match_set(self.STATEMENT_PARSERS): 1109 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1110 1111 if self._match_set(Tokenizer.COMMANDS): 1112 return self._parse_command() 1113 1114 expression = self._parse_expression() 1115 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1116 return self._parse_query_modifiers(expression) 1117 1118 def _parse_drop(self) -> exp.Drop | exp.Command: 1119 start = self._prev 1120 temporary = self._match(TokenType.TEMPORARY) 1121 materialized = self._match_text_seq("MATERIALIZED") 1122 1123 kind = self._match_set(self.CREATABLES) and self._prev.text 1124 if not kind: 1125 return self._parse_as_command(start) 1126 1127 return self.expression( 1128 exp.Drop, 1129 exists=self._parse_exists(), 1130 this=self._parse_table(schema=True), 1131 kind=kind, 1132 temporary=temporary, 1133 materialized=materialized, 1134 cascade=self._match_text_seq("CASCADE"), 1135 constraints=self._match_text_seq("CONSTRAINTS"), 1136 purge=self._match_text_seq("PURGE"), 1137 ) 1138 1139 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1140 return ( 1141 self._match(TokenType.IF) 1142 and (not not_ or self._match(TokenType.NOT)) 1143 and self._match(TokenType.EXISTS) 1144 ) 1145 1146 def _parse_create(self) -> exp.Create | exp.Command: 1147 # Note: this can't be None because we've matched a statement parser 1148 start = self._prev 1149 replace = start.text.upper() == "REPLACE" or self._match_pair( 1150 TokenType.OR, TokenType.REPLACE 1151 ) 1152 unique = self._match(TokenType.UNIQUE) 1153 1154 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1155 self._advance() 1156 1157 properties = None 1158 create_token = self._match_set(self.CREATABLES) and self._prev 1159 1160 if not create_token: 1161 # exp.Properties.Location.POST_CREATE 1162 properties = self._parse_properties() 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not properties or not create_token: 1166 return self._parse_as_command(start) 1167 1168 exists = self._parse_exists(not_=True) 1169 this = None 1170 expression = None 1171 indexes = None 1172 no_schema_binding = None 1173 begin = None 1174 clone = None 1175 1176 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1177 nonlocal properties 1178 if properties and temp_props: 1179 properties.expressions.extend(temp_props.expressions) 1180 elif temp_props: 1181 properties = temp_props 1182 1183 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1184 this = self._parse_user_defined_function(kind=create_token.token_type) 1185 1186 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1187 extend_props(self._parse_properties()) 1188 1189 self._match(TokenType.ALIAS) 1190 begin = self._match(TokenType.BEGIN) 1191 return_ = self._match_text_seq("RETURN") 1192 expression = self._parse_statement() 1193 1194 if return_: 1195 expression = self.expression(exp.Return, this=expression) 1196 elif create_token.token_type == TokenType.INDEX: 1197 this = self._parse_index(index=self._parse_id_var()) 1198 elif create_token.token_type in self.DB_CREATABLES: 1199 table_parts = self._parse_table_parts(schema=True) 1200 1201 # exp.Properties.Location.POST_NAME 1202 self._match(TokenType.COMMA) 1203 extend_props(self._parse_properties(before=True)) 1204 1205 this = self._parse_schema(this=table_parts) 1206 1207 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1208 extend_props(self._parse_properties()) 1209 1210 self._match(TokenType.ALIAS) 1211 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1212 # exp.Properties.Location.POST_ALIAS 1213 extend_props(self._parse_properties()) 1214 1215 expression = self._parse_ddl_select() 1216 1217 if create_token.token_type == TokenType.TABLE: 1218 indexes = [] 1219 while True: 1220 index = self._parse_index() 1221 1222 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1223 extend_props(self._parse_properties()) 1224 1225 if not index: 1226 break 1227 else: 1228 self._match(TokenType.COMMA) 1229 indexes.append(index) 1230 elif create_token.token_type == TokenType.VIEW: 1231 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1232 no_schema_binding = True 1233 1234 if self._match_text_seq("CLONE"): 1235 clone = self._parse_table(schema=True) 1236 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1237 clone_kind = ( 1238 self._match(TokenType.L_PAREN) 1239 and self._match_texts(self.CLONE_KINDS) 1240 and self._prev.text.upper() 1241 ) 1242 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1243 self._match(TokenType.R_PAREN) 1244 clone = self.expression( 1245 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1246 ) 1247 1248 return self.expression( 1249 exp.Create, 1250 this=this, 1251 kind=create_token.text, 1252 replace=replace, 1253 unique=unique, 1254 expression=expression, 1255 exists=exists, 1256 properties=properties, 1257 indexes=indexes, 1258 no_schema_binding=no_schema_binding, 1259 begin=begin, 1260 clone=clone, 1261 ) 1262 1263 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1264 # only used for teradata currently 1265 self._match(TokenType.COMMA) 1266 1267 kwargs = { 1268 "no": self._match_text_seq("NO"), 1269 "dual": self._match_text_seq("DUAL"), 1270 "before": self._match_text_seq("BEFORE"), 1271 "default": self._match_text_seq("DEFAULT"), 1272 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1273 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1274 "after": self._match_text_seq("AFTER"), 1275 "minimum": self._match_texts(("MIN", "MINIMUM")), 1276 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1277 } 1278 1279 if self._match_texts(self.PROPERTY_PARSERS): 1280 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1281 try: 1282 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1283 except TypeError: 1284 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1285 1286 return None 1287 1288 def _parse_property(self) -> t.Optional[exp.Expression]: 1289 if self._match_texts(self.PROPERTY_PARSERS): 1290 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1291 1292 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1293 return self._parse_character_set(default=True) 1294 1295 if self._match_text_seq("COMPOUND", "SORTKEY"): 1296 return self._parse_sortkey(compound=True) 1297 1298 if self._match_text_seq("SQL", "SECURITY"): 1299 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1300 1301 assignment = self._match_pair( 1302 TokenType.VAR, TokenType.EQ, advance=False 1303 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1304 1305 if assignment: 1306 key = self._parse_var_or_string() 1307 self._match(TokenType.EQ) 1308 return self.expression(exp.Property, this=key, value=self._parse_column()) 1309 1310 return None 1311 1312 def _parse_stored(self) -> exp.FileFormatProperty: 1313 self._match(TokenType.ALIAS) 1314 1315 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1316 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1317 1318 return self.expression( 1319 exp.FileFormatProperty, 1320 this=self.expression( 1321 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1322 ) 1323 if input_format or output_format 1324 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1325 ) 1326 1327 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1328 self._match(TokenType.EQ) 1329 self._match(TokenType.ALIAS) 1330 return self.expression(exp_class, this=self._parse_field()) 1331 1332 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1333 properties = [] 1334 while True: 1335 if before: 1336 prop = self._parse_property_before() 1337 else: 1338 prop = self._parse_property() 1339 1340 if not prop: 1341 break 1342 for p in ensure_list(prop): 1343 properties.append(p) 1344 1345 if properties: 1346 return self.expression(exp.Properties, expressions=properties) 1347 1348 return None 1349 1350 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1351 return self.expression( 1352 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1353 ) 1354 1355 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1356 if self._index >= 2: 1357 pre_volatile_token = self._tokens[self._index - 2] 1358 else: 1359 pre_volatile_token = None 1360 1361 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1362 return exp.VolatileProperty() 1363 1364 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1365 1366 def _parse_with_property( 1367 self, 1368 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1369 self._match(TokenType.WITH) 1370 if self._match(TokenType.L_PAREN, advance=False): 1371 return self._parse_wrapped_csv(self._parse_property) 1372 1373 if self._match_text_seq("JOURNAL"): 1374 return self._parse_withjournaltable() 1375 1376 if self._match_text_seq("DATA"): 1377 return self._parse_withdata(no=False) 1378 elif self._match_text_seq("NO", "DATA"): 1379 return self._parse_withdata(no=True) 1380 1381 if not self._next: 1382 return None 1383 1384 return self._parse_withisolatedloading() 1385 1386 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1387 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1388 self._match(TokenType.EQ) 1389 1390 user = self._parse_id_var() 1391 self._match(TokenType.PARAMETER) 1392 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1393 1394 if not user or not host: 1395 return None 1396 1397 return exp.DefinerProperty(this=f"{user}@{host}") 1398 1399 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1400 self._match(TokenType.TABLE) 1401 self._match(TokenType.EQ) 1402 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1403 1404 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1405 return self.expression(exp.LogProperty, no=no) 1406 1407 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1408 return self.expression(exp.JournalProperty, **kwargs) 1409 1410 def _parse_checksum(self) -> exp.ChecksumProperty: 1411 self._match(TokenType.EQ) 1412 1413 on = None 1414 if self._match(TokenType.ON): 1415 on = True 1416 elif self._match_text_seq("OFF"): 1417 on = False 1418 1419 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1420 1421 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1422 if not self._match_text_seq("BY"): 1423 self._retreat(self._index - 1) 1424 return None 1425 1426 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1427 1428 def _parse_freespace(self) -> exp.FreespaceProperty: 1429 self._match(TokenType.EQ) 1430 return self.expression( 1431 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1432 ) 1433 1434 def _parse_mergeblockratio( 1435 self, no: bool = False, default: bool = False 1436 ) -> exp.MergeBlockRatioProperty: 1437 if self._match(TokenType.EQ): 1438 return self.expression( 1439 exp.MergeBlockRatioProperty, 1440 this=self._parse_number(), 1441 percent=self._match(TokenType.PERCENT), 1442 ) 1443 1444 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1445 1446 def _parse_datablocksize( 1447 self, 1448 default: t.Optional[bool] = None, 1449 minimum: t.Optional[bool] = None, 1450 maximum: t.Optional[bool] = None, 1451 ) -> exp.DataBlocksizeProperty: 1452 self._match(TokenType.EQ) 1453 size = self._parse_number() 1454 1455 units = None 1456 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1457 units = self._prev.text 1458 1459 return self.expression( 1460 exp.DataBlocksizeProperty, 1461 size=size, 1462 units=units, 1463 default=default, 1464 minimum=minimum, 1465 maximum=maximum, 1466 ) 1467 1468 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1469 self._match(TokenType.EQ) 1470 always = self._match_text_seq("ALWAYS") 1471 manual = self._match_text_seq("MANUAL") 1472 never = self._match_text_seq("NEVER") 1473 default = self._match_text_seq("DEFAULT") 1474 1475 autotemp = None 1476 if self._match_text_seq("AUTOTEMP"): 1477 autotemp = self._parse_schema() 1478 1479 return self.expression( 1480 exp.BlockCompressionProperty, 1481 always=always, 1482 manual=manual, 1483 never=never, 1484 default=default, 1485 autotemp=autotemp, 1486 ) 1487 1488 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1489 no = self._match_text_seq("NO") 1490 concurrent = self._match_text_seq("CONCURRENT") 1491 self._match_text_seq("ISOLATED", "LOADING") 1492 for_all = self._match_text_seq("FOR", "ALL") 1493 for_insert = self._match_text_seq("FOR", "INSERT") 1494 for_none = self._match_text_seq("FOR", "NONE") 1495 return self.expression( 1496 exp.IsolatedLoadingProperty, 1497 no=no, 1498 concurrent=concurrent, 1499 for_all=for_all, 1500 for_insert=for_insert, 1501 for_none=for_none, 1502 ) 1503 1504 def _parse_locking(self) -> exp.LockingProperty: 1505 if self._match(TokenType.TABLE): 1506 kind = "TABLE" 1507 elif self._match(TokenType.VIEW): 1508 kind = "VIEW" 1509 elif self._match(TokenType.ROW): 1510 kind = "ROW" 1511 elif self._match_text_seq("DATABASE"): 1512 kind = "DATABASE" 1513 else: 1514 kind = None 1515 1516 if kind in ("DATABASE", "TABLE", "VIEW"): 1517 this = self._parse_table_parts() 1518 else: 1519 this = None 1520 1521 if self._match(TokenType.FOR): 1522 for_or_in = "FOR" 1523 elif self._match(TokenType.IN): 1524 for_or_in = "IN" 1525 else: 1526 for_or_in = None 1527 1528 if self._match_text_seq("ACCESS"): 1529 lock_type = "ACCESS" 1530 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1531 lock_type = "EXCLUSIVE" 1532 elif self._match_text_seq("SHARE"): 1533 lock_type = "SHARE" 1534 elif self._match_text_seq("READ"): 1535 lock_type = "READ" 1536 elif self._match_text_seq("WRITE"): 1537 lock_type = "WRITE" 1538 elif self._match_text_seq("CHECKSUM"): 1539 lock_type = "CHECKSUM" 1540 else: 1541 lock_type = None 1542 1543 override = self._match_text_seq("OVERRIDE") 1544 1545 return self.expression( 1546 exp.LockingProperty, 1547 this=this, 1548 kind=kind, 1549 for_or_in=for_or_in, 1550 lock_type=lock_type, 1551 override=override, 1552 ) 1553 1554 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1555 if self._match(TokenType.PARTITION_BY): 1556 return self._parse_csv(self._parse_conjunction) 1557 return [] 1558 1559 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1560 self._match(TokenType.EQ) 1561 return self.expression( 1562 exp.PartitionedByProperty, 1563 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1564 ) 1565 1566 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1567 if self._match_text_seq("AND", "STATISTICS"): 1568 statistics = True 1569 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1570 statistics = False 1571 else: 1572 statistics = None 1573 1574 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1575 1576 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1577 if self._match_text_seq("PRIMARY", "INDEX"): 1578 return exp.NoPrimaryIndexProperty() 1579 return None 1580 1581 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1582 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1583 return exp.OnCommitProperty() 1584 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1585 return exp.OnCommitProperty(delete=True) 1586 return None 1587 1588 def _parse_distkey(self) -> exp.DistKeyProperty: 1589 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1590 1591 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1592 table = self._parse_table(schema=True) 1593 1594 options = [] 1595 while self._match_texts(("INCLUDING", "EXCLUDING")): 1596 this = self._prev.text.upper() 1597 1598 id_var = self._parse_id_var() 1599 if not id_var: 1600 return None 1601 1602 options.append( 1603 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1604 ) 1605 1606 return self.expression(exp.LikeProperty, this=table, expressions=options) 1607 1608 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1609 return self.expression( 1610 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1611 ) 1612 1613 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1614 self._match(TokenType.EQ) 1615 return self.expression( 1616 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1617 ) 1618 1619 def _parse_returns(self) -> exp.ReturnsProperty: 1620 value: t.Optional[exp.Expression] 1621 is_table = self._match(TokenType.TABLE) 1622 1623 if is_table: 1624 if self._match(TokenType.LT): 1625 value = self.expression( 1626 exp.Schema, 1627 this="TABLE", 1628 expressions=self._parse_csv(self._parse_struct_types), 1629 ) 1630 if not self._match(TokenType.GT): 1631 self.raise_error("Expecting >") 1632 else: 1633 value = self._parse_schema(exp.var("TABLE")) 1634 else: 1635 value = self._parse_types() 1636 1637 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1638 1639 def _parse_describe(self) -> exp.Describe: 1640 kind = self._match_set(self.CREATABLES) and self._prev.text 1641 this = self._parse_table() 1642 return self.expression(exp.Describe, this=this, kind=kind) 1643 1644 def _parse_insert(self) -> exp.Insert: 1645 overwrite = self._match(TokenType.OVERWRITE) 1646 local = self._match_text_seq("LOCAL") 1647 alternative = None 1648 1649 if self._match_text_seq("DIRECTORY"): 1650 this: t.Optional[exp.Expression] = self.expression( 1651 exp.Directory, 1652 this=self._parse_var_or_string(), 1653 local=local, 1654 row_format=self._parse_row_format(match_row=True), 1655 ) 1656 else: 1657 if self._match(TokenType.OR): 1658 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1659 1660 self._match(TokenType.INTO) 1661 self._match(TokenType.TABLE) 1662 this = self._parse_table(schema=True) 1663 1664 return self.expression( 1665 exp.Insert, 1666 this=this, 1667 exists=self._parse_exists(), 1668 partition=self._parse_partition(), 1669 expression=self._parse_ddl_select(), 1670 conflict=self._parse_on_conflict(), 1671 returning=self._parse_returning(), 1672 overwrite=overwrite, 1673 alternative=alternative, 1674 ) 1675 1676 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1677 conflict = self._match_text_seq("ON", "CONFLICT") 1678 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1679 1680 if not conflict and not duplicate: 1681 return None 1682 1683 nothing = None 1684 expressions = None 1685 key = None 1686 constraint = None 1687 1688 if conflict: 1689 if self._match_text_seq("ON", "CONSTRAINT"): 1690 constraint = self._parse_id_var() 1691 else: 1692 key = self._parse_csv(self._parse_value) 1693 1694 self._match_text_seq("DO") 1695 if self._match_text_seq("NOTHING"): 1696 nothing = True 1697 else: 1698 self._match(TokenType.UPDATE) 1699 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1700 1701 return self.expression( 1702 exp.OnConflict, 1703 duplicate=duplicate, 1704 expressions=expressions, 1705 nothing=nothing, 1706 key=key, 1707 constraint=constraint, 1708 ) 1709 1710 def _parse_returning(self) -> t.Optional[exp.Returning]: 1711 if not self._match(TokenType.RETURNING): 1712 return None 1713 1714 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1715 1716 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1717 if not self._match(TokenType.FORMAT): 1718 return None 1719 return self._parse_row_format() 1720 1721 def _parse_row_format( 1722 self, match_row: bool = False 1723 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1724 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1725 return None 1726 1727 if self._match_text_seq("SERDE"): 1728 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1729 1730 self._match_text_seq("DELIMITED") 1731 1732 kwargs = {} 1733 1734 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1735 kwargs["fields"] = self._parse_string() 1736 if self._match_text_seq("ESCAPED", "BY"): 1737 kwargs["escaped"] = self._parse_string() 1738 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1739 kwargs["collection_items"] = self._parse_string() 1740 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1741 kwargs["map_keys"] = self._parse_string() 1742 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1743 kwargs["lines"] = self._parse_string() 1744 if self._match_text_seq("NULL", "DEFINED", "AS"): 1745 kwargs["null"] = self._parse_string() 1746 1747 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1748 1749 def _parse_load(self) -> exp.LoadData | exp.Command: 1750 if self._match_text_seq("DATA"): 1751 local = self._match_text_seq("LOCAL") 1752 self._match_text_seq("INPATH") 1753 inpath = self._parse_string() 1754 overwrite = self._match(TokenType.OVERWRITE) 1755 self._match_pair(TokenType.INTO, TokenType.TABLE) 1756 1757 return self.expression( 1758 exp.LoadData, 1759 this=self._parse_table(schema=True), 1760 local=local, 1761 overwrite=overwrite, 1762 inpath=inpath, 1763 partition=self._parse_partition(), 1764 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1765 serde=self._match_text_seq("SERDE") and self._parse_string(), 1766 ) 1767 return self._parse_as_command(self._prev) 1768 1769 def _parse_delete(self) -> exp.Delete: 1770 self._match(TokenType.FROM) 1771 1772 return self.expression( 1773 exp.Delete, 1774 this=self._parse_table(), 1775 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1776 where=self._parse_where(), 1777 returning=self._parse_returning(), 1778 ) 1779 1780 def _parse_update(self) -> exp.Update: 1781 return self.expression( 1782 exp.Update, 1783 **{ # type: ignore 1784 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1785 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1786 "from": self._parse_from(modifiers=True), 1787 "where": self._parse_where(), 1788 "returning": self._parse_returning(), 1789 }, 1790 ) 1791 1792 def _parse_uncache(self) -> exp.Uncache: 1793 if not self._match(TokenType.TABLE): 1794 self.raise_error("Expecting TABLE after UNCACHE") 1795 1796 return self.expression( 1797 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1798 ) 1799 1800 def _parse_cache(self) -> exp.Cache: 1801 lazy = self._match_text_seq("LAZY") 1802 self._match(TokenType.TABLE) 1803 table = self._parse_table(schema=True) 1804 1805 options = [] 1806 if self._match_text_seq("OPTIONS"): 1807 self._match_l_paren() 1808 k = self._parse_string() 1809 self._match(TokenType.EQ) 1810 v = self._parse_string() 1811 options = [k, v] 1812 self._match_r_paren() 1813 1814 self._match(TokenType.ALIAS) 1815 return self.expression( 1816 exp.Cache, 1817 this=table, 1818 lazy=lazy, 1819 options=options, 1820 expression=self._parse_select(nested=True), 1821 ) 1822 1823 def _parse_partition(self) -> t.Optional[exp.Partition]: 1824 if not self._match(TokenType.PARTITION): 1825 return None 1826 1827 return self.expression( 1828 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1829 ) 1830 1831 def _parse_value(self) -> exp.Tuple: 1832 if self._match(TokenType.L_PAREN): 1833 expressions = self._parse_csv(self._parse_conjunction) 1834 self._match_r_paren() 1835 return self.expression(exp.Tuple, expressions=expressions) 1836 1837 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1838 # Source: https://prestodb.io/docs/current/sql/values.html 1839 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1840 1841 def _parse_select( 1842 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1843 ) -> t.Optional[exp.Expression]: 1844 cte = self._parse_with() 1845 if cte: 1846 this = self._parse_statement() 1847 1848 if not this: 1849 self.raise_error("Failed to parse any statement following CTE") 1850 return cte 1851 1852 if "with" in this.arg_types: 1853 this.set("with", cte) 1854 else: 1855 self.raise_error(f"{this.key} does not support CTE") 1856 this = cte 1857 elif self._match(TokenType.SELECT): 1858 comments = self._prev_comments 1859 1860 hint = self._parse_hint() 1861 all_ = self._match(TokenType.ALL) 1862 distinct = self._match(TokenType.DISTINCT) 1863 1864 kind = ( 1865 self._match(TokenType.ALIAS) 1866 and self._match_texts(("STRUCT", "VALUE")) 1867 and self._prev.text 1868 ) 1869 1870 if distinct: 1871 distinct = self.expression( 1872 exp.Distinct, 1873 on=self._parse_value() if self._match(TokenType.ON) else None, 1874 ) 1875 1876 if all_ and distinct: 1877 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1878 1879 limit = self._parse_limit(top=True) 1880 expressions = self._parse_csv(self._parse_expression) 1881 1882 this = self.expression( 1883 exp.Select, 1884 kind=kind, 1885 hint=hint, 1886 distinct=distinct, 1887 expressions=expressions, 1888 limit=limit, 1889 ) 1890 this.comments = comments 1891 1892 into = self._parse_into() 1893 if into: 1894 this.set("into", into) 1895 1896 from_ = self._parse_from() 1897 if from_: 1898 this.set("from", from_) 1899 1900 this = self._parse_query_modifiers(this) 1901 elif (table or nested) and self._match(TokenType.L_PAREN): 1902 if self._match(TokenType.PIVOT): 1903 this = self._parse_simplified_pivot() 1904 elif self._match(TokenType.FROM): 1905 this = exp.select("*").from_( 1906 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1907 ) 1908 else: 1909 this = self._parse_table() if table else self._parse_select(nested=True) 1910 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1911 1912 self._match_r_paren() 1913 1914 # early return so that subquery unions aren't parsed again 1915 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1916 # Union ALL should be a property of the top select node, not the subquery 1917 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1918 elif self._match(TokenType.VALUES): 1919 this = self.expression( 1920 exp.Values, 1921 expressions=self._parse_csv(self._parse_value), 1922 alias=self._parse_table_alias(), 1923 ) 1924 else: 1925 this = None 1926 1927 return self._parse_set_operations(this) 1928 1929 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1930 if not skip_with_token and not self._match(TokenType.WITH): 1931 return None 1932 1933 comments = self._prev_comments 1934 recursive = self._match(TokenType.RECURSIVE) 1935 1936 expressions = [] 1937 while True: 1938 expressions.append(self._parse_cte()) 1939 1940 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1941 break 1942 else: 1943 self._match(TokenType.WITH) 1944 1945 return self.expression( 1946 exp.With, comments=comments, expressions=expressions, recursive=recursive 1947 ) 1948 1949 def _parse_cte(self) -> exp.CTE: 1950 alias = self._parse_table_alias() 1951 if not alias or not alias.this: 1952 self.raise_error("Expected CTE to have alias") 1953 1954 self._match(TokenType.ALIAS) 1955 return self.expression( 1956 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1957 ) 1958 1959 def _parse_table_alias( 1960 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1961 ) -> t.Optional[exp.TableAlias]: 1962 any_token = self._match(TokenType.ALIAS) 1963 alias = ( 1964 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1965 or self._parse_string_as_identifier() 1966 ) 1967 1968 index = self._index 1969 if self._match(TokenType.L_PAREN): 1970 columns = self._parse_csv(self._parse_function_parameter) 1971 self._match_r_paren() if columns else self._retreat(index) 1972 else: 1973 columns = None 1974 1975 if not alias and not columns: 1976 return None 1977 1978 return self.expression(exp.TableAlias, this=alias, columns=columns) 1979 1980 def _parse_subquery( 1981 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1982 ) -> t.Optional[exp.Subquery]: 1983 if not this: 1984 return None 1985 1986 return self.expression( 1987 exp.Subquery, 1988 this=this, 1989 pivots=self._parse_pivots(), 1990 alias=self._parse_table_alias() if parse_alias else None, 1991 ) 1992 1993 def _parse_query_modifiers( 1994 self, this: t.Optional[exp.Expression] 1995 ) -> t.Optional[exp.Expression]: 1996 if isinstance(this, self.MODIFIABLES): 1997 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1998 expression = parser(self) 1999 2000 if expression: 2001 if key == "limit": 2002 offset = expression.args.pop("offset", None) 2003 if offset: 2004 this.set("offset", exp.Offset(expression=offset)) 2005 this.set(key, expression) 2006 return this 2007 2008 def _parse_hint(self) -> t.Optional[exp.Hint]: 2009 if self._match(TokenType.HINT): 2010 hints = self._parse_csv(self._parse_function) 2011 2012 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2013 self.raise_error("Expected */ after HINT") 2014 2015 return self.expression(exp.Hint, expressions=hints) 2016 2017 return None 2018 2019 def _parse_into(self) -> t.Optional[exp.Into]: 2020 if not self._match(TokenType.INTO): 2021 return None 2022 2023 temp = self._match(TokenType.TEMPORARY) 2024 unlogged = self._match_text_seq("UNLOGGED") 2025 self._match(TokenType.TABLE) 2026 2027 return self.expression( 2028 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2029 ) 2030 2031 def _parse_from( 2032 self, modifiers: bool = False, skip_from_token: bool = False 2033 ) -> t.Optional[exp.From]: 2034 if not skip_from_token and not self._match(TokenType.FROM): 2035 return None 2036 2037 comments = self._prev_comments 2038 this = self._parse_table() 2039 2040 return self.expression( 2041 exp.From, 2042 comments=comments, 2043 this=self._parse_query_modifiers(this) if modifiers else this, 2044 ) 2045 2046 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2047 if not self._match(TokenType.MATCH_RECOGNIZE): 2048 return None 2049 2050 self._match_l_paren() 2051 2052 partition = self._parse_partition_by() 2053 order = self._parse_order() 2054 measures = ( 2055 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2056 ) 2057 2058 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2059 rows = exp.var("ONE ROW PER MATCH") 2060 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2061 text = "ALL ROWS PER MATCH" 2062 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2063 text += f" SHOW EMPTY MATCHES" 2064 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2065 text += f" OMIT EMPTY MATCHES" 2066 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2067 text += f" WITH UNMATCHED ROWS" 2068 rows = exp.var(text) 2069 else: 2070 rows = None 2071 2072 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2073 text = "AFTER MATCH SKIP" 2074 if self._match_text_seq("PAST", "LAST", "ROW"): 2075 text += f" PAST LAST ROW" 2076 elif self._match_text_seq("TO", "NEXT", "ROW"): 2077 text += f" TO NEXT ROW" 2078 elif self._match_text_seq("TO", "FIRST"): 2079 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2080 elif self._match_text_seq("TO", "LAST"): 2081 text += f" TO LAST {self._advance_any().text}" # type: ignore 2082 after = exp.var(text) 2083 else: 2084 after = None 2085 2086 if self._match_text_seq("PATTERN"): 2087 self._match_l_paren() 2088 2089 if not self._curr: 2090 self.raise_error("Expecting )", self._curr) 2091 2092 paren = 1 2093 start = self._curr 2094 2095 while self._curr and paren > 0: 2096 if self._curr.token_type == TokenType.L_PAREN: 2097 paren += 1 2098 if self._curr.token_type == TokenType.R_PAREN: 2099 paren -= 1 2100 2101 end = self._prev 2102 self._advance() 2103 2104 if paren > 0: 2105 self.raise_error("Expecting )", self._curr) 2106 2107 pattern = exp.var(self._find_sql(start, end)) 2108 else: 2109 pattern = None 2110 2111 define = ( 2112 self._parse_csv( 2113 lambda: self.expression( 2114 exp.Alias, 2115 alias=self._parse_id_var(any_token=True), 2116 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2117 ) 2118 ) 2119 if self._match_text_seq("DEFINE") 2120 else None 2121 ) 2122 2123 self._match_r_paren() 2124 2125 return self.expression( 2126 exp.MatchRecognize, 2127 partition_by=partition, 2128 order=order, 2129 measures=measures, 2130 rows=rows, 2131 after=after, 2132 pattern=pattern, 2133 define=define, 2134 alias=self._parse_table_alias(), 2135 ) 2136 2137 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2138 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2139 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2140 2141 if outer_apply or cross_apply: 2142 this = self._parse_select(table=True) 2143 view = None 2144 outer = not cross_apply 2145 elif self._match(TokenType.LATERAL): 2146 this = self._parse_select(table=True) 2147 view = self._match(TokenType.VIEW) 2148 outer = self._match(TokenType.OUTER) 2149 else: 2150 return None 2151 2152 if not this: 2153 this = self._parse_function() or self._parse_id_var(any_token=False) 2154 while self._match(TokenType.DOT): 2155 this = exp.Dot( 2156 this=this, 2157 expression=self._parse_function() or self._parse_id_var(any_token=False), 2158 ) 2159 2160 if view: 2161 table = self._parse_id_var(any_token=False) 2162 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2163 table_alias: t.Optional[exp.TableAlias] = self.expression( 2164 exp.TableAlias, this=table, columns=columns 2165 ) 2166 elif isinstance(this, exp.Subquery) and this.alias: 2167 # Ensures parity between the Subquery's and the Lateral's "alias" args 2168 table_alias = this.args["alias"].copy() 2169 else: 2170 table_alias = self._parse_table_alias() 2171 2172 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2173 2174 def _parse_join_parts( 2175 self, 2176 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2177 return ( 2178 self._match_set(self.JOIN_METHODS) and self._prev, 2179 self._match_set(self.JOIN_SIDES) and self._prev, 2180 self._match_set(self.JOIN_KINDS) and self._prev, 2181 ) 2182 2183 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2184 if self._match(TokenType.COMMA): 2185 return self.expression(exp.Join, this=self._parse_table()) 2186 2187 index = self._index 2188 method, side, kind = self._parse_join_parts() 2189 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2190 join = self._match(TokenType.JOIN) 2191 2192 if not skip_join_token and not join: 2193 self._retreat(index) 2194 kind = None 2195 method = None 2196 side = None 2197 2198 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2199 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2200 2201 if not skip_join_token and not join and not outer_apply and not cross_apply: 2202 return None 2203 2204 if outer_apply: 2205 side = Token(TokenType.LEFT, "LEFT") 2206 2207 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2208 2209 if method: 2210 kwargs["method"] = method.text 2211 if side: 2212 kwargs["side"] = side.text 2213 if kind: 2214 kwargs["kind"] = kind.text 2215 if hint: 2216 kwargs["hint"] = hint 2217 2218 if self._match(TokenType.ON): 2219 kwargs["on"] = self._parse_conjunction() 2220 elif self._match(TokenType.USING): 2221 kwargs["using"] = self._parse_wrapped_id_vars() 2222 2223 return self.expression(exp.Join, **kwargs) 2224 2225 def _parse_index( 2226 self, 2227 index: t.Optional[exp.Expression] = None, 2228 ) -> t.Optional[exp.Index]: 2229 if index: 2230 unique = None 2231 primary = None 2232 amp = None 2233 2234 self._match(TokenType.ON) 2235 self._match(TokenType.TABLE) # hive 2236 table = self._parse_table_parts(schema=True) 2237 else: 2238 unique = self._match(TokenType.UNIQUE) 2239 primary = self._match_text_seq("PRIMARY") 2240 amp = self._match_text_seq("AMP") 2241 2242 if not self._match(TokenType.INDEX): 2243 return None 2244 2245 index = self._parse_id_var() 2246 table = None 2247 2248 using = self._parse_field() if self._match(TokenType.USING) else None 2249 2250 if self._match(TokenType.L_PAREN, advance=False): 2251 columns = self._parse_wrapped_csv(self._parse_ordered) 2252 else: 2253 columns = None 2254 2255 return self.expression( 2256 exp.Index, 2257 this=index, 2258 table=table, 2259 using=using, 2260 columns=columns, 2261 unique=unique, 2262 primary=primary, 2263 amp=amp, 2264 partition_by=self._parse_partition_by(), 2265 ) 2266 2267 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2268 return ( 2269 (not schema and self._parse_function(optional_parens=False)) 2270 or self._parse_id_var(any_token=False) 2271 or self._parse_string_as_identifier() 2272 or self._parse_placeholder() 2273 ) 2274 2275 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2276 catalog = None 2277 db = None 2278 table = self._parse_table_part(schema=schema) 2279 2280 while self._match(TokenType.DOT): 2281 if catalog: 2282 # This allows nesting the table in arbitrarily many dot expressions if needed 2283 table = self.expression( 2284 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2285 ) 2286 else: 2287 catalog = db 2288 db = table 2289 table = self._parse_table_part(schema=schema) 2290 2291 if not table: 2292 self.raise_error(f"Expected table name but got {self._curr}") 2293 2294 return self.expression( 2295 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2296 ) 2297 2298 def _parse_table( 2299 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2300 ) -> t.Optional[exp.Expression]: 2301 lateral = self._parse_lateral() 2302 if lateral: 2303 return lateral 2304 2305 unnest = self._parse_unnest() 2306 if unnest: 2307 return unnest 2308 2309 values = self._parse_derived_table_values() 2310 if values: 2311 return values 2312 2313 subquery = self._parse_select(table=True) 2314 if subquery: 2315 if not subquery.args.get("pivots"): 2316 subquery.set("pivots", self._parse_pivots()) 2317 return subquery 2318 2319 this: exp.Expression = self._parse_table_parts(schema=schema) 2320 2321 if schema: 2322 return self._parse_schema(this=this) 2323 2324 if self.ALIAS_POST_TABLESAMPLE: 2325 table_sample = self._parse_table_sample() 2326 2327 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2328 if alias: 2329 this.set("alias", alias) 2330 2331 if not this.args.get("pivots"): 2332 this.set("pivots", self._parse_pivots()) 2333 2334 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2335 this.set( 2336 "hints", 2337 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2338 ) 2339 self._match_r_paren() 2340 2341 if not self.ALIAS_POST_TABLESAMPLE: 2342 table_sample = self._parse_table_sample() 2343 2344 if table_sample: 2345 table_sample.set("this", this) 2346 this = table_sample 2347 2348 return this 2349 2350 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2351 if not self._match(TokenType.UNNEST): 2352 return None 2353 2354 expressions = self._parse_wrapped_csv(self._parse_type) 2355 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2356 2357 alias = self._parse_table_alias() if with_alias else None 2358 2359 if alias and self.UNNEST_COLUMN_ONLY: 2360 if alias.args.get("columns"): 2361 self.raise_error("Unexpected extra column alias in unnest.") 2362 2363 alias.set("columns", [alias.this]) 2364 alias.set("this", None) 2365 2366 offset = None 2367 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2368 self._match(TokenType.ALIAS) 2369 offset = self._parse_id_var() or exp.to_identifier("offset") 2370 2371 return self.expression( 2372 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2373 ) 2374 2375 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2376 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2377 if not is_derived and not self._match(TokenType.VALUES): 2378 return None 2379 2380 expressions = self._parse_csv(self._parse_value) 2381 alias = self._parse_table_alias() 2382 2383 if is_derived: 2384 self._match_r_paren() 2385 2386 return self.expression( 2387 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2388 ) 2389 2390 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2391 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2392 as_modifier and self._match_text_seq("USING", "SAMPLE") 2393 ): 2394 return None 2395 2396 bucket_numerator = None 2397 bucket_denominator = None 2398 bucket_field = None 2399 percent = None 2400 rows = None 2401 size = None 2402 seed = None 2403 2404 kind = ( 2405 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2406 ) 2407 method = self._parse_var(tokens=(TokenType.ROW,)) 2408 2409 self._match(TokenType.L_PAREN) 2410 2411 num = self._parse_number() 2412 2413 if self._match_text_seq("BUCKET"): 2414 bucket_numerator = self._parse_number() 2415 self._match_text_seq("OUT", "OF") 2416 bucket_denominator = bucket_denominator = self._parse_number() 2417 self._match(TokenType.ON) 2418 bucket_field = self._parse_field() 2419 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2420 percent = num 2421 elif self._match(TokenType.ROWS): 2422 rows = num 2423 else: 2424 size = num 2425 2426 self._match(TokenType.R_PAREN) 2427 2428 if self._match(TokenType.L_PAREN): 2429 method = self._parse_var() 2430 seed = self._match(TokenType.COMMA) and self._parse_number() 2431 self._match_r_paren() 2432 elif self._match_texts(("SEED", "REPEATABLE")): 2433 seed = self._parse_wrapped(self._parse_number) 2434 2435 return self.expression( 2436 exp.TableSample, 2437 method=method, 2438 bucket_numerator=bucket_numerator, 2439 bucket_denominator=bucket_denominator, 2440 bucket_field=bucket_field, 2441 percent=percent, 2442 rows=rows, 2443 size=size, 2444 seed=seed, 2445 kind=kind, 2446 ) 2447 2448 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2449 return list(iter(self._parse_pivot, None)) 2450 2451 # https://duckdb.org/docs/sql/statements/pivot 2452 def _parse_simplified_pivot(self) -> exp.Pivot: 2453 def _parse_on() -> t.Optional[exp.Expression]: 2454 this = self._parse_bitwise() 2455 return self._parse_in(this) if self._match(TokenType.IN) else this 2456 2457 this = self._parse_table() 2458 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2459 using = self._match(TokenType.USING) and self._parse_csv( 2460 lambda: self._parse_alias(self._parse_function()) 2461 ) 2462 group = self._parse_group() 2463 return self.expression( 2464 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2465 ) 2466 2467 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2468 index = self._index 2469 2470 if self._match(TokenType.PIVOT): 2471 unpivot = False 2472 elif self._match(TokenType.UNPIVOT): 2473 unpivot = True 2474 else: 2475 return None 2476 2477 expressions = [] 2478 field = None 2479 2480 if not self._match(TokenType.L_PAREN): 2481 self._retreat(index) 2482 return None 2483 2484 if unpivot: 2485 expressions = self._parse_csv(self._parse_column) 2486 else: 2487 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2488 2489 if not expressions: 2490 self.raise_error("Failed to parse PIVOT's aggregation list") 2491 2492 if not self._match(TokenType.FOR): 2493 self.raise_error("Expecting FOR") 2494 2495 value = self._parse_column() 2496 2497 if not self._match(TokenType.IN): 2498 self.raise_error("Expecting IN") 2499 2500 field = self._parse_in(value, alias=True) 2501 2502 self._match_r_paren() 2503 2504 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2505 2506 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2507 pivot.set("alias", self._parse_table_alias()) 2508 2509 if not unpivot: 2510 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2511 2512 columns: t.List[exp.Expression] = [] 2513 for fld in pivot.args["field"].expressions: 2514 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2515 for name in names: 2516 if self.PREFIXED_PIVOT_COLUMNS: 2517 name = f"{name}_{field_name}" if name else field_name 2518 else: 2519 name = f"{field_name}_{name}" if name else field_name 2520 2521 columns.append(exp.to_identifier(name)) 2522 2523 pivot.set("columns", columns) 2524 2525 return pivot 2526 2527 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2528 return [agg.alias for agg in aggregations] 2529 2530 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2531 if not skip_where_token and not self._match(TokenType.WHERE): 2532 return None 2533 2534 return self.expression( 2535 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2536 ) 2537 2538 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2539 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2540 return None 2541 2542 elements = defaultdict(list) 2543 2544 while True: 2545 expressions = self._parse_csv(self._parse_conjunction) 2546 if expressions: 2547 elements["expressions"].extend(expressions) 2548 2549 grouping_sets = self._parse_grouping_sets() 2550 if grouping_sets: 2551 elements["grouping_sets"].extend(grouping_sets) 2552 2553 rollup = None 2554 cube = None 2555 totals = None 2556 2557 with_ = self._match(TokenType.WITH) 2558 if self._match(TokenType.ROLLUP): 2559 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2560 elements["rollup"].extend(ensure_list(rollup)) 2561 2562 if self._match(TokenType.CUBE): 2563 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2564 elements["cube"].extend(ensure_list(cube)) 2565 2566 if self._match_text_seq("TOTALS"): 2567 totals = True 2568 elements["totals"] = True # type: ignore 2569 2570 if not (grouping_sets or rollup or cube or totals): 2571 break 2572 2573 return self.expression(exp.Group, **elements) # type: ignore 2574 2575 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2576 if not self._match(TokenType.GROUPING_SETS): 2577 return None 2578 2579 return self._parse_wrapped_csv(self._parse_grouping_set) 2580 2581 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2582 if self._match(TokenType.L_PAREN): 2583 grouping_set = self._parse_csv(self._parse_column) 2584 self._match_r_paren() 2585 return self.expression(exp.Tuple, expressions=grouping_set) 2586 2587 return self._parse_column() 2588 2589 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2590 if not skip_having_token and not self._match(TokenType.HAVING): 2591 return None 2592 return self.expression(exp.Having, this=self._parse_conjunction()) 2593 2594 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2595 if not self._match(TokenType.QUALIFY): 2596 return None 2597 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2598 2599 def _parse_order( 2600 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2601 ) -> t.Optional[exp.Expression]: 2602 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2603 return this 2604 2605 return self.expression( 2606 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2607 ) 2608 2609 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2610 if not self._match_text_seq(*texts): 2611 return None 2612 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2613 2614 def _parse_ordered(self) -> exp.Ordered: 2615 this = self._parse_conjunction() 2616 self._match(TokenType.ASC) 2617 2618 is_desc = self._match(TokenType.DESC) 2619 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2620 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2621 desc = is_desc or False 2622 asc = not desc 2623 nulls_first = is_nulls_first or False 2624 explicitly_null_ordered = is_nulls_first or is_nulls_last 2625 2626 if ( 2627 not explicitly_null_ordered 2628 and ( 2629 (asc and self.NULL_ORDERING == "nulls_are_small") 2630 or (desc and self.NULL_ORDERING != "nulls_are_small") 2631 ) 2632 and self.NULL_ORDERING != "nulls_are_last" 2633 ): 2634 nulls_first = True 2635 2636 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2637 2638 def _parse_limit( 2639 self, this: t.Optional[exp.Expression] = None, top: bool = False 2640 ) -> t.Optional[exp.Expression]: 2641 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2642 limit_paren = self._match(TokenType.L_PAREN) 2643 expression = self._parse_number() if top else self._parse_term() 2644 2645 if self._match(TokenType.COMMA): 2646 offset = expression 2647 expression = self._parse_term() 2648 else: 2649 offset = None 2650 2651 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2652 2653 if limit_paren: 2654 self._match_r_paren() 2655 2656 return limit_exp 2657 2658 if self._match(TokenType.FETCH): 2659 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2660 direction = self._prev.text if direction else "FIRST" 2661 2662 count = self._parse_number() 2663 percent = self._match(TokenType.PERCENT) 2664 2665 self._match_set((TokenType.ROW, TokenType.ROWS)) 2666 2667 only = self._match_text_seq("ONLY") 2668 with_ties = self._match_text_seq("WITH", "TIES") 2669 2670 if only and with_ties: 2671 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2672 2673 return self.expression( 2674 exp.Fetch, 2675 direction=direction, 2676 count=count, 2677 percent=percent, 2678 with_ties=with_ties, 2679 ) 2680 2681 return this 2682 2683 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2684 if not self._match(TokenType.OFFSET): 2685 return this 2686 2687 count = self._parse_number() 2688 self._match_set((TokenType.ROW, TokenType.ROWS)) 2689 return self.expression(exp.Offset, this=this, expression=count) 2690 2691 def _parse_locks(self) -> t.List[exp.Lock]: 2692 locks = [] 2693 while True: 2694 if self._match_text_seq("FOR", "UPDATE"): 2695 update = True 2696 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2697 "LOCK", "IN", "SHARE", "MODE" 2698 ): 2699 update = False 2700 else: 2701 break 2702 2703 expressions = None 2704 if self._match_text_seq("OF"): 2705 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2706 2707 wait: t.Optional[bool | exp.Expression] = None 2708 if self._match_text_seq("NOWAIT"): 2709 wait = True 2710 elif self._match_text_seq("WAIT"): 2711 wait = self._parse_primary() 2712 elif self._match_text_seq("SKIP", "LOCKED"): 2713 wait = False 2714 2715 locks.append( 2716 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2717 ) 2718 2719 return locks 2720 2721 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2722 if not self._match_set(self.SET_OPERATIONS): 2723 return this 2724 2725 token_type = self._prev.token_type 2726 2727 if token_type == TokenType.UNION: 2728 expression = exp.Union 2729 elif token_type == TokenType.EXCEPT: 2730 expression = exp.Except 2731 else: 2732 expression = exp.Intersect 2733 2734 return self.expression( 2735 expression, 2736 this=this, 2737 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2738 expression=self._parse_set_operations(self._parse_select(nested=True)), 2739 ) 2740 2741 def _parse_expression(self) -> t.Optional[exp.Expression]: 2742 return self._parse_alias(self._parse_conjunction()) 2743 2744 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2745 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2746 2747 def _parse_equality(self) -> t.Optional[exp.Expression]: 2748 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2749 2750 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2751 return self._parse_tokens(self._parse_range, self.COMPARISON) 2752 2753 def _parse_range(self) -> t.Optional[exp.Expression]: 2754 this = self._parse_bitwise() 2755 negate = self._match(TokenType.NOT) 2756 2757 if self._match_set(self.RANGE_PARSERS): 2758 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2759 if not expression: 2760 return this 2761 2762 this = expression 2763 elif self._match(TokenType.ISNULL): 2764 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2765 2766 # Postgres supports ISNULL and NOTNULL for conditions. 2767 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2768 if self._match(TokenType.NOTNULL): 2769 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2770 this = self.expression(exp.Not, this=this) 2771 2772 if negate: 2773 this = self.expression(exp.Not, this=this) 2774 2775 if self._match(TokenType.IS): 2776 this = self._parse_is(this) 2777 2778 return this 2779 2780 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2781 index = self._index - 1 2782 negate = self._match(TokenType.NOT) 2783 2784 if self._match_text_seq("DISTINCT", "FROM"): 2785 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2786 return self.expression(klass, this=this, expression=self._parse_expression()) 2787 2788 expression = self._parse_null() or self._parse_boolean() 2789 if not expression: 2790 self._retreat(index) 2791 return None 2792 2793 this = self.expression(exp.Is, this=this, expression=expression) 2794 return self.expression(exp.Not, this=this) if negate else this 2795 2796 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2797 unnest = self._parse_unnest(with_alias=False) 2798 if unnest: 2799 this = self.expression(exp.In, this=this, unnest=unnest) 2800 elif self._match(TokenType.L_PAREN): 2801 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2802 2803 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2804 this = self.expression(exp.In, this=this, query=expressions[0]) 2805 else: 2806 this = self.expression(exp.In, this=this, expressions=expressions) 2807 2808 self._match_r_paren(this) 2809 else: 2810 this = self.expression(exp.In, this=this, field=self._parse_field()) 2811 2812 return this 2813 2814 def _parse_between(self, this: exp.Expression) -> exp.Between: 2815 low = self._parse_bitwise() 2816 self._match(TokenType.AND) 2817 high = self._parse_bitwise() 2818 return self.expression(exp.Between, this=this, low=low, high=high) 2819 2820 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2821 if not self._match(TokenType.ESCAPE): 2822 return this 2823 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2824 2825 def _parse_interval(self) -> t.Optional[exp.Interval]: 2826 if not self._match(TokenType.INTERVAL): 2827 return None 2828 2829 this = self._parse_primary() or self._parse_term() 2830 unit = self._parse_function() or self._parse_var() 2831 2832 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2833 # each INTERVAL expression into this canonical form so it's easy to transpile 2834 if this and this.is_number: 2835 this = exp.Literal.string(this.name) 2836 elif this and this.is_string: 2837 parts = this.name.split() 2838 2839 if len(parts) == 2: 2840 if unit: 2841 # this is not actually a unit, it's something else 2842 unit = None 2843 self._retreat(self._index - 1) 2844 else: 2845 this = exp.Literal.string(parts[0]) 2846 unit = self.expression(exp.Var, this=parts[1]) 2847 2848 return self.expression(exp.Interval, this=this, unit=unit) 2849 2850 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2851 this = self._parse_term() 2852 2853 while True: 2854 if self._match_set(self.BITWISE): 2855 this = self.expression( 2856 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2857 ) 2858 elif self._match_pair(TokenType.LT, TokenType.LT): 2859 this = self.expression( 2860 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2861 ) 2862 elif self._match_pair(TokenType.GT, TokenType.GT): 2863 this = self.expression( 2864 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2865 ) 2866 else: 2867 break 2868 2869 return this 2870 2871 def _parse_term(self) -> t.Optional[exp.Expression]: 2872 return self._parse_tokens(self._parse_factor, self.TERM) 2873 2874 def _parse_factor(self) -> t.Optional[exp.Expression]: 2875 return self._parse_tokens(self._parse_unary, self.FACTOR) 2876 2877 def _parse_unary(self) -> t.Optional[exp.Expression]: 2878 if self._match_set(self.UNARY_PARSERS): 2879 return self.UNARY_PARSERS[self._prev.token_type](self) 2880 return self._parse_at_time_zone(self._parse_type()) 2881 2882 def _parse_type(self) -> t.Optional[exp.Expression]: 2883 interval = self._parse_interval() 2884 if interval: 2885 return interval 2886 2887 index = self._index 2888 data_type = self._parse_types(check_func=True) 2889 this = self._parse_column() 2890 2891 if data_type: 2892 if isinstance(this, exp.Literal): 2893 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2894 if parser: 2895 return parser(self, this, data_type) 2896 return self.expression(exp.Cast, this=this, to=data_type) 2897 if not data_type.expressions: 2898 self._retreat(index) 2899 return self._parse_column() 2900 return self._parse_column_ops(data_type) 2901 2902 return this 2903 2904 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2905 this = self._parse_type() 2906 if not this: 2907 return None 2908 2909 return self.expression( 2910 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2911 ) 2912 2913 def _parse_types( 2914 self, check_func: bool = False, schema: bool = False 2915 ) -> t.Optional[exp.Expression]: 2916 index = self._index 2917 2918 prefix = self._match_text_seq("SYSUDTLIB", ".") 2919 2920 if not self._match_set(self.TYPE_TOKENS): 2921 return None 2922 2923 type_token = self._prev.token_type 2924 2925 if type_token == TokenType.PSEUDO_TYPE: 2926 return self.expression(exp.PseudoType, this=self._prev.text) 2927 2928 nested = type_token in self.NESTED_TYPE_TOKENS 2929 is_struct = type_token == TokenType.STRUCT 2930 expressions = None 2931 maybe_func = False 2932 2933 if self._match(TokenType.L_PAREN): 2934 if is_struct: 2935 expressions = self._parse_csv(self._parse_struct_types) 2936 elif nested: 2937 expressions = self._parse_csv( 2938 lambda: self._parse_types(check_func=check_func, schema=schema) 2939 ) 2940 else: 2941 expressions = self._parse_csv(self._parse_type_size) 2942 2943 if not expressions or not self._match(TokenType.R_PAREN): 2944 self._retreat(index) 2945 return None 2946 2947 maybe_func = True 2948 2949 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2950 this = exp.DataType( 2951 this=exp.DataType.Type.ARRAY, 2952 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2953 nested=True, 2954 ) 2955 2956 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2957 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2958 2959 return this 2960 2961 if self._match(TokenType.L_BRACKET): 2962 self._retreat(index) 2963 return None 2964 2965 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2966 if nested and self._match(TokenType.LT): 2967 if is_struct: 2968 expressions = self._parse_csv(self._parse_struct_types) 2969 else: 2970 expressions = self._parse_csv( 2971 lambda: self._parse_types(check_func=check_func, schema=schema) 2972 ) 2973 2974 if not self._match(TokenType.GT): 2975 self.raise_error("Expecting >") 2976 2977 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2978 values = self._parse_csv(self._parse_conjunction) 2979 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2980 2981 value: t.Optional[exp.Expression] = None 2982 if type_token in self.TIMESTAMPS: 2983 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2984 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2985 elif ( 2986 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2987 or type_token == TokenType.TIMESTAMPLTZ 2988 ): 2989 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2990 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2991 if type_token == TokenType.TIME: 2992 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2993 else: 2994 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2995 2996 maybe_func = maybe_func and value is None 2997 2998 if value is None: 2999 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3000 elif type_token == TokenType.INTERVAL: 3001 unit = self._parse_var() 3002 3003 if not unit: 3004 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3005 else: 3006 value = self.expression(exp.Interval, unit=unit) 3007 3008 if maybe_func and check_func: 3009 index2 = self._index 3010 peek = self._parse_string() 3011 3012 if not peek: 3013 self._retreat(index) 3014 return None 3015 3016 self._retreat(index2) 3017 3018 if value: 3019 return value 3020 3021 return exp.DataType( 3022 this=exp.DataType.Type[type_token.value.upper()], 3023 expressions=expressions, 3024 nested=nested, 3025 values=values, 3026 prefix=prefix, 3027 ) 3028 3029 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3030 this = self._parse_type() or self._parse_id_var() 3031 self._match(TokenType.COLON) 3032 return self._parse_column_def(this) 3033 3034 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3035 if not self._match_text_seq("AT", "TIME", "ZONE"): 3036 return this 3037 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3038 3039 def _parse_column(self) -> t.Optional[exp.Expression]: 3040 this = self._parse_field() 3041 if isinstance(this, exp.Identifier): 3042 this = self.expression(exp.Column, this=this) 3043 elif not this: 3044 return self._parse_bracket(this) 3045 return self._parse_column_ops(this) 3046 3047 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3048 this = self._parse_bracket(this) 3049 3050 while self._match_set(self.COLUMN_OPERATORS): 3051 op_token = self._prev.token_type 3052 op = self.COLUMN_OPERATORS.get(op_token) 3053 3054 if op_token == TokenType.DCOLON: 3055 field = self._parse_types() 3056 if not field: 3057 self.raise_error("Expected type") 3058 elif op and self._curr: 3059 self._advance() 3060 value = self._prev.text 3061 field = ( 3062 exp.Literal.number(value) 3063 if self._prev.token_type == TokenType.NUMBER 3064 else exp.Literal.string(value) 3065 ) 3066 else: 3067 field = self._parse_field(anonymous_func=True, any_token=True) 3068 3069 if isinstance(field, exp.Func): 3070 # bigquery allows function calls like x.y.count(...) 3071 # SAFE.SUBSTR(...) 3072 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3073 this = self._replace_columns_with_dots(this) 3074 3075 if op: 3076 this = op(self, this, field) 3077 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3078 this = self.expression( 3079 exp.Column, 3080 this=field, 3081 table=this.this, 3082 db=this.args.get("table"), 3083 catalog=this.args.get("db"), 3084 ) 3085 else: 3086 this = self.expression(exp.Dot, this=this, expression=field) 3087 this = self._parse_bracket(this) 3088 return this 3089 3090 def _parse_primary(self) -> t.Optional[exp.Expression]: 3091 if self._match_set(self.PRIMARY_PARSERS): 3092 token_type = self._prev.token_type 3093 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3094 3095 if token_type == TokenType.STRING: 3096 expressions = [primary] 3097 while self._match(TokenType.STRING): 3098 expressions.append(exp.Literal.string(self._prev.text)) 3099 3100 if len(expressions) > 1: 3101 return self.expression(exp.Concat, expressions=expressions) 3102 3103 return primary 3104 3105 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3106 return exp.Literal.number(f"0.{self._prev.text}") 3107 3108 if self._match(TokenType.L_PAREN): 3109 comments = self._prev_comments 3110 query = self._parse_select() 3111 3112 if query: 3113 expressions = [query] 3114 else: 3115 expressions = self._parse_csv(self._parse_expression) 3116 3117 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3118 3119 if isinstance(this, exp.Subqueryable): 3120 this = self._parse_set_operations( 3121 self._parse_subquery(this=this, parse_alias=False) 3122 ) 3123 elif len(expressions) > 1: 3124 this = self.expression(exp.Tuple, expressions=expressions) 3125 else: 3126 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3127 3128 if this: 3129 this.add_comments(comments) 3130 3131 self._match_r_paren(expression=this) 3132 return this 3133 3134 return None 3135 3136 def _parse_field( 3137 self, 3138 any_token: bool = False, 3139 tokens: t.Optional[t.Collection[TokenType]] = None, 3140 anonymous_func: bool = False, 3141 ) -> t.Optional[exp.Expression]: 3142 return ( 3143 self._parse_primary() 3144 or self._parse_function(anonymous=anonymous_func) 3145 or self._parse_id_var(any_token=any_token, tokens=tokens) 3146 ) 3147 3148 def _parse_function( 3149 self, 3150 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3151 anonymous: bool = False, 3152 optional_parens: bool = True, 3153 ) -> t.Optional[exp.Expression]: 3154 if not self._curr: 3155 return None 3156 3157 token_type = self._curr.token_type 3158 3159 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3160 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3161 3162 if not self._next or self._next.token_type != TokenType.L_PAREN: 3163 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3164 self._advance() 3165 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3166 3167 return None 3168 3169 if token_type not in self.FUNC_TOKENS: 3170 return None 3171 3172 this = self._curr.text 3173 upper = this.upper() 3174 self._advance(2) 3175 3176 parser = self.FUNCTION_PARSERS.get(upper) 3177 3178 if parser and not anonymous: 3179 this = parser(self) 3180 else: 3181 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3182 3183 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3184 this = self.expression(subquery_predicate, this=self._parse_select()) 3185 self._match_r_paren() 3186 return this 3187 3188 if functions is None: 3189 functions = self.FUNCTIONS 3190 3191 function = functions.get(upper) 3192 3193 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3194 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3195 3196 if function and not anonymous: 3197 this = self.validate_expression(function(args), args) 3198 else: 3199 this = self.expression(exp.Anonymous, this=this, expressions=args) 3200 3201 self._match_r_paren(this) 3202 return self._parse_window(this) 3203 3204 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3205 return self._parse_column_def(self._parse_id_var()) 3206 3207 def _parse_user_defined_function( 3208 self, kind: t.Optional[TokenType] = None 3209 ) -> t.Optional[exp.Expression]: 3210 this = self._parse_id_var() 3211 3212 while self._match(TokenType.DOT): 3213 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3214 3215 if not self._match(TokenType.L_PAREN): 3216 return this 3217 3218 expressions = self._parse_csv(self._parse_function_parameter) 3219 self._match_r_paren() 3220 return self.expression( 3221 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3222 ) 3223 3224 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3225 literal = self._parse_primary() 3226 if literal: 3227 return self.expression(exp.Introducer, this=token.text, expression=literal) 3228 3229 return self.expression(exp.Identifier, this=token.text) 3230 3231 def _parse_session_parameter(self) -> exp.SessionParameter: 3232 kind = None 3233 this = self._parse_id_var() or self._parse_primary() 3234 3235 if this and self._match(TokenType.DOT): 3236 kind = this.name 3237 this = self._parse_var() or self._parse_primary() 3238 3239 return self.expression(exp.SessionParameter, this=this, kind=kind) 3240 3241 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3242 index = self._index 3243 3244 if self._match(TokenType.L_PAREN): 3245 expressions = self._parse_csv(self._parse_id_var) 3246 3247 if not self._match(TokenType.R_PAREN): 3248 self._retreat(index) 3249 else: 3250 expressions = [self._parse_id_var()] 3251 3252 if self._match_set(self.LAMBDAS): 3253 return self.LAMBDAS[self._prev.token_type](self, expressions) 3254 3255 self._retreat(index) 3256 3257 this: t.Optional[exp.Expression] 3258 3259 if self._match(TokenType.DISTINCT): 3260 this = self.expression( 3261 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3262 ) 3263 else: 3264 this = self._parse_select_or_expression(alias=alias) 3265 3266 if isinstance(this, exp.EQ): 3267 left = this.this 3268 if isinstance(left, exp.Column): 3269 left.replace(exp.var(left.text("this"))) 3270 3271 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3272 3273 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3274 index = self._index 3275 3276 if not self.errors: 3277 try: 3278 if self._parse_select(nested=True): 3279 return this 3280 except ParseError: 3281 pass 3282 finally: 3283 self.errors.clear() 3284 self._retreat(index) 3285 3286 if not self._match(TokenType.L_PAREN): 3287 return this 3288 3289 args = self._parse_csv( 3290 lambda: self._parse_constraint() 3291 or self._parse_column_def(self._parse_field(any_token=True)) 3292 ) 3293 3294 self._match_r_paren() 3295 return self.expression(exp.Schema, this=this, expressions=args) 3296 3297 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3298 # column defs are not really columns, they're identifiers 3299 if isinstance(this, exp.Column): 3300 this = this.this 3301 3302 kind = self._parse_types(schema=True) 3303 3304 if self._match_text_seq("FOR", "ORDINALITY"): 3305 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3306 3307 constraints = [] 3308 while True: 3309 constraint = self._parse_column_constraint() 3310 if not constraint: 3311 break 3312 constraints.append(constraint) 3313 3314 if not kind and not constraints: 3315 return this 3316 3317 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3318 3319 def _parse_auto_increment( 3320 self, 3321 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3322 start = None 3323 increment = None 3324 3325 if self._match(TokenType.L_PAREN, advance=False): 3326 args = self._parse_wrapped_csv(self._parse_bitwise) 3327 start = seq_get(args, 0) 3328 increment = seq_get(args, 1) 3329 elif self._match_text_seq("START"): 3330 start = self._parse_bitwise() 3331 self._match_text_seq("INCREMENT") 3332 increment = self._parse_bitwise() 3333 3334 if start and increment: 3335 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3336 3337 return exp.AutoIncrementColumnConstraint() 3338 3339 def _parse_compress(self) -> exp.CompressColumnConstraint: 3340 if self._match(TokenType.L_PAREN, advance=False): 3341 return self.expression( 3342 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3343 ) 3344 3345 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3346 3347 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3348 if self._match_text_seq("BY", "DEFAULT"): 3349 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3350 this = self.expression( 3351 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3352 ) 3353 else: 3354 self._match_text_seq("ALWAYS") 3355 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3356 3357 self._match(TokenType.ALIAS) 3358 identity = self._match_text_seq("IDENTITY") 3359 3360 if self._match(TokenType.L_PAREN): 3361 if self._match_text_seq("START", "WITH"): 3362 this.set("start", self._parse_bitwise()) 3363 if self._match_text_seq("INCREMENT", "BY"): 3364 this.set("increment", self._parse_bitwise()) 3365 if self._match_text_seq("MINVALUE"): 3366 this.set("minvalue", self._parse_bitwise()) 3367 if self._match_text_seq("MAXVALUE"): 3368 this.set("maxvalue", self._parse_bitwise()) 3369 3370 if self._match_text_seq("CYCLE"): 3371 this.set("cycle", True) 3372 elif self._match_text_seq("NO", "CYCLE"): 3373 this.set("cycle", False) 3374 3375 if not identity: 3376 this.set("expression", self._parse_bitwise()) 3377 3378 self._match_r_paren() 3379 3380 return this 3381 3382 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3383 self._match_text_seq("LENGTH") 3384 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3385 3386 def _parse_not_constraint( 3387 self, 3388 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3389 if self._match_text_seq("NULL"): 3390 return self.expression(exp.NotNullColumnConstraint) 3391 if self._match_text_seq("CASESPECIFIC"): 3392 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3393 return None 3394 3395 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3396 if self._match(TokenType.CONSTRAINT): 3397 this = self._parse_id_var() 3398 else: 3399 this = None 3400 3401 if self._match_texts(self.CONSTRAINT_PARSERS): 3402 return self.expression( 3403 exp.ColumnConstraint, 3404 this=this, 3405 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3406 ) 3407 3408 return this 3409 3410 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3411 if not self._match(TokenType.CONSTRAINT): 3412 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3413 3414 this = self._parse_id_var() 3415 expressions = [] 3416 3417 while True: 3418 constraint = self._parse_unnamed_constraint() or self._parse_function() 3419 if not constraint: 3420 break 3421 expressions.append(constraint) 3422 3423 return self.expression(exp.Constraint, this=this, expressions=expressions) 3424 3425 def _parse_unnamed_constraint( 3426 self, constraints: t.Optional[t.Collection[str]] = None 3427 ) -> t.Optional[exp.Expression]: 3428 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3429 return None 3430 3431 constraint = self._prev.text.upper() 3432 if constraint not in self.CONSTRAINT_PARSERS: 3433 self.raise_error(f"No parser found for schema constraint {constraint}.") 3434 3435 return self.CONSTRAINT_PARSERS[constraint](self) 3436 3437 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3438 self._match_text_seq("KEY") 3439 return self.expression( 3440 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3441 ) 3442 3443 def _parse_key_constraint_options(self) -> t.List[str]: 3444 options = [] 3445 while True: 3446 if not self._curr: 3447 break 3448 3449 if self._match(TokenType.ON): 3450 action = None 3451 on = self._advance_any() and self._prev.text 3452 3453 if self._match_text_seq("NO", "ACTION"): 3454 action = "NO ACTION" 3455 elif self._match_text_seq("CASCADE"): 3456 action = "CASCADE" 3457 elif self._match_pair(TokenType.SET, TokenType.NULL): 3458 action = "SET NULL" 3459 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3460 action = "SET DEFAULT" 3461 else: 3462 self.raise_error("Invalid key constraint") 3463 3464 options.append(f"ON {on} {action}") 3465 elif self._match_text_seq("NOT", "ENFORCED"): 3466 options.append("NOT ENFORCED") 3467 elif self._match_text_seq("DEFERRABLE"): 3468 options.append("DEFERRABLE") 3469 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3470 options.append("INITIALLY DEFERRED") 3471 elif self._match_text_seq("NORELY"): 3472 options.append("NORELY") 3473 elif self._match_text_seq("MATCH", "FULL"): 3474 options.append("MATCH FULL") 3475 else: 3476 break 3477 3478 return options 3479 3480 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3481 if match and not self._match(TokenType.REFERENCES): 3482 return None 3483 3484 expressions = None 3485 this = self._parse_id_var() 3486 3487 if self._match(TokenType.L_PAREN, advance=False): 3488 expressions = self._parse_wrapped_id_vars() 3489 3490 options = self._parse_key_constraint_options() 3491 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3492 3493 def _parse_foreign_key(self) -> exp.ForeignKey: 3494 expressions = self._parse_wrapped_id_vars() 3495 reference = self._parse_references() 3496 options = {} 3497 3498 while self._match(TokenType.ON): 3499 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3500 self.raise_error("Expected DELETE or UPDATE") 3501 3502 kind = self._prev.text.lower() 3503 3504 if self._match_text_seq("NO", "ACTION"): 3505 action = "NO ACTION" 3506 elif self._match(TokenType.SET): 3507 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3508 action = "SET " + self._prev.text.upper() 3509 else: 3510 self._advance() 3511 action = self._prev.text.upper() 3512 3513 options[kind] = action 3514 3515 return self.expression( 3516 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3517 ) 3518 3519 def _parse_primary_key( 3520 self, wrapped_optional: bool = False, in_props: bool = False 3521 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3522 desc = ( 3523 self._match_set((TokenType.ASC, TokenType.DESC)) 3524 and self._prev.token_type == TokenType.DESC 3525 ) 3526 3527 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3528 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3529 3530 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3531 options = self._parse_key_constraint_options() 3532 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3533 3534 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3535 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3536 return this 3537 3538 bracket_kind = self._prev.token_type 3539 3540 if self._match(TokenType.COLON): 3541 expressions: t.List[t.Optional[exp.Expression]] = [ 3542 self.expression(exp.Slice, expression=self._parse_conjunction()) 3543 ] 3544 else: 3545 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3546 3547 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3548 if bracket_kind == TokenType.L_BRACE: 3549 this = self.expression(exp.Struct, expressions=expressions) 3550 elif not this or this.name.upper() == "ARRAY": 3551 this = self.expression(exp.Array, expressions=expressions) 3552 else: 3553 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3554 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3555 3556 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3557 self.raise_error("Expected ]") 3558 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3559 self.raise_error("Expected }") 3560 3561 self._add_comments(this) 3562 return self._parse_bracket(this) 3563 3564 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3565 if self._match(TokenType.COLON): 3566 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3567 return this 3568 3569 def _parse_case(self) -> t.Optional[exp.Expression]: 3570 ifs = [] 3571 default = None 3572 3573 expression = self._parse_conjunction() 3574 3575 while self._match(TokenType.WHEN): 3576 this = self._parse_conjunction() 3577 self._match(TokenType.THEN) 3578 then = self._parse_conjunction() 3579 ifs.append(self.expression(exp.If, this=this, true=then)) 3580 3581 if self._match(TokenType.ELSE): 3582 default = self._parse_conjunction() 3583 3584 if not self._match(TokenType.END): 3585 self.raise_error("Expected END after CASE", self._prev) 3586 3587 return self._parse_window( 3588 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3589 ) 3590 3591 def _parse_if(self) -> t.Optional[exp.Expression]: 3592 if self._match(TokenType.L_PAREN): 3593 args = self._parse_csv(self._parse_conjunction) 3594 this = self.validate_expression(exp.If.from_arg_list(args), args) 3595 self._match_r_paren() 3596 else: 3597 index = self._index - 1 3598 condition = self._parse_conjunction() 3599 3600 if not condition: 3601 self._retreat(index) 3602 return None 3603 3604 self._match(TokenType.THEN) 3605 true = self._parse_conjunction() 3606 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3607 self._match(TokenType.END) 3608 this = self.expression(exp.If, this=condition, true=true, false=false) 3609 3610 return self._parse_window(this) 3611 3612 def _parse_extract(self) -> exp.Extract: 3613 this = self._parse_function() or self._parse_var() or self._parse_type() 3614 3615 if self._match(TokenType.FROM): 3616 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3617 3618 if not self._match(TokenType.COMMA): 3619 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3620 3621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3622 3623 def _parse_cast(self, strict: bool) -> exp.Expression: 3624 this = self._parse_conjunction() 3625 3626 if not self._match(TokenType.ALIAS): 3627 if self._match(TokenType.COMMA): 3628 return self.expression( 3629 exp.CastToStrType, this=this, expression=self._parse_string() 3630 ) 3631 else: 3632 self.raise_error("Expected AS after CAST") 3633 3634 to = self._parse_types() 3635 3636 if not to: 3637 self.raise_error("Expected TYPE after CAST") 3638 elif to.this == exp.DataType.Type.CHAR: 3639 if self._match(TokenType.CHARACTER_SET): 3640 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3641 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3642 fmt = self._parse_string() 3643 3644 return self.expression( 3645 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3646 this=this, 3647 format=exp.Literal.string( 3648 format_time( 3649 fmt.this if fmt else "", 3650 self.FORMAT_MAPPING or self.TIME_MAPPING, 3651 self.FORMAT_TRIE or self.TIME_TRIE, 3652 ) 3653 ), 3654 ) 3655 3656 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3657 3658 def _parse_concat(self) -> t.Optional[exp.Expression]: 3659 args = self._parse_csv(self._parse_conjunction) 3660 if self.CONCAT_NULL_OUTPUTS_STRING: 3661 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3662 3663 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3664 # we find such a call we replace it with its argument. 3665 if len(args) == 1: 3666 return args[0] 3667 3668 return self.expression( 3669 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3670 ) 3671 3672 def _parse_string_agg(self) -> exp.Expression: 3673 expression: t.Optional[exp.Expression] 3674 3675 if self._match(TokenType.DISTINCT): 3676 args = self._parse_csv(self._parse_conjunction) 3677 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3678 else: 3679 args = self._parse_csv(self._parse_conjunction) 3680 expression = seq_get(args, 0) 3681 3682 index = self._index 3683 if not self._match(TokenType.R_PAREN): 3684 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3685 order = self._parse_order(this=expression) 3686 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3687 3688 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3689 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3690 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3691 if not self._match_text_seq("WITHIN", "GROUP"): 3692 self._retreat(index) 3693 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3694 3695 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3696 order = self._parse_order(this=expression) 3697 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3698 3699 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3700 to: t.Optional[exp.Expression] 3701 this = self._parse_bitwise() 3702 3703 if self._match(TokenType.USING): 3704 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3705 elif self._match(TokenType.COMMA): 3706 to = self._parse_bitwise() 3707 else: 3708 to = None 3709 3710 # Swap the argument order if needed to produce the correct AST 3711 if self.CONVERT_TYPE_FIRST: 3712 this, to = to, this 3713 3714 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3715 3716 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3717 """ 3718 There are generally two variants of the DECODE function: 3719 3720 - DECODE(bin, charset) 3721 - DECODE(expression, search, result [, search, result] ... [, default]) 3722 3723 The second variant will always be parsed into a CASE expression. Note that NULL 3724 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3725 instead of relying on pattern matching. 3726 """ 3727 args = self._parse_csv(self._parse_conjunction) 3728 3729 if len(args) < 3: 3730 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3731 3732 expression, *expressions = args 3733 if not expression: 3734 return None 3735 3736 ifs = [] 3737 for search, result in zip(expressions[::2], expressions[1::2]): 3738 if not search or not result: 3739 return None 3740 3741 if isinstance(search, exp.Literal): 3742 ifs.append( 3743 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3744 ) 3745 elif isinstance(search, exp.Null): 3746 ifs.append( 3747 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3748 ) 3749 else: 3750 cond = exp.or_( 3751 exp.EQ(this=expression.copy(), expression=search), 3752 exp.and_( 3753 exp.Is(this=expression.copy(), expression=exp.Null()), 3754 exp.Is(this=search.copy(), expression=exp.Null()), 3755 copy=False, 3756 ), 3757 copy=False, 3758 ) 3759 ifs.append(exp.If(this=cond, true=result)) 3760 3761 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3762 3763 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3764 self._match_text_seq("KEY") 3765 key = self._parse_field() 3766 self._match(TokenType.COLON) 3767 self._match_text_seq("VALUE") 3768 value = self._parse_field() 3769 3770 if not key and not value: 3771 return None 3772 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3773 3774 def _parse_json_object(self) -> exp.JSONObject: 3775 star = self._parse_star() 3776 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3777 3778 null_handling = None 3779 if self._match_text_seq("NULL", "ON", "NULL"): 3780 null_handling = "NULL ON NULL" 3781 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3782 null_handling = "ABSENT ON NULL" 3783 3784 unique_keys = None 3785 if self._match_text_seq("WITH", "UNIQUE"): 3786 unique_keys = True 3787 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3788 unique_keys = False 3789 3790 self._match_text_seq("KEYS") 3791 3792 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3793 format_json = self._match_text_seq("FORMAT", "JSON") 3794 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3795 3796 return self.expression( 3797 exp.JSONObject, 3798 expressions=expressions, 3799 null_handling=null_handling, 3800 unique_keys=unique_keys, 3801 return_type=return_type, 3802 format_json=format_json, 3803 encoding=encoding, 3804 ) 3805 3806 def _parse_logarithm(self) -> exp.Func: 3807 # Default argument order is base, expression 3808 args = self._parse_csv(self._parse_range) 3809 3810 if len(args) > 1: 3811 if not self.LOG_BASE_FIRST: 3812 args.reverse() 3813 return exp.Log.from_arg_list(args) 3814 3815 return self.expression( 3816 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3817 ) 3818 3819 def _parse_match_against(self) -> exp.MatchAgainst: 3820 expressions = self._parse_csv(self._parse_column) 3821 3822 self._match_text_seq(")", "AGAINST", "(") 3823 3824 this = self._parse_string() 3825 3826 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3827 modifier = "IN NATURAL LANGUAGE MODE" 3828 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3829 modifier = f"{modifier} WITH QUERY EXPANSION" 3830 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3831 modifier = "IN BOOLEAN MODE" 3832 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3833 modifier = "WITH QUERY EXPANSION" 3834 else: 3835 modifier = None 3836 3837 return self.expression( 3838 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3839 ) 3840 3841 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3842 def _parse_open_json(self) -> exp.OpenJSON: 3843 this = self._parse_bitwise() 3844 path = self._match(TokenType.COMMA) and self._parse_string() 3845 3846 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3847 this = self._parse_field(any_token=True) 3848 kind = self._parse_types() 3849 path = self._parse_string() 3850 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3851 3852 return self.expression( 3853 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3854 ) 3855 3856 expressions = None 3857 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3858 self._match_l_paren() 3859 expressions = self._parse_csv(_parse_open_json_column_def) 3860 3861 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3862 3863 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3864 args = self._parse_csv(self._parse_bitwise) 3865 3866 if self._match(TokenType.IN): 3867 return self.expression( 3868 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3869 ) 3870 3871 if haystack_first: 3872 haystack = seq_get(args, 0) 3873 needle = seq_get(args, 1) 3874 else: 3875 needle = seq_get(args, 0) 3876 haystack = seq_get(args, 1) 3877 3878 return self.expression( 3879 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3880 ) 3881 3882 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3883 args = self._parse_csv(self._parse_table) 3884 return exp.JoinHint(this=func_name.upper(), expressions=args) 3885 3886 def _parse_substring(self) -> exp.Substring: 3887 # Postgres supports the form: substring(string [from int] [for int]) 3888 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3889 3890 args = self._parse_csv(self._parse_bitwise) 3891 3892 if self._match(TokenType.FROM): 3893 args.append(self._parse_bitwise()) 3894 if self._match(TokenType.FOR): 3895 args.append(self._parse_bitwise()) 3896 3897 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3898 3899 def _parse_trim(self) -> exp.Trim: 3900 # https://www.w3resource.com/sql/character-functions/trim.php 3901 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3902 3903 position = None 3904 collation = None 3905 3906 if self._match_texts(self.TRIM_TYPES): 3907 position = self._prev.text.upper() 3908 3909 expression = self._parse_bitwise() 3910 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3911 this = self._parse_bitwise() 3912 else: 3913 this = expression 3914 expression = None 3915 3916 if self._match(TokenType.COLLATE): 3917 collation = self._parse_bitwise() 3918 3919 return self.expression( 3920 exp.Trim, this=this, position=position, expression=expression, collation=collation 3921 ) 3922 3923 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3924 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3925 3926 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3927 return self._parse_window(self._parse_id_var(), alias=True) 3928 3929 def _parse_respect_or_ignore_nulls( 3930 self, this: t.Optional[exp.Expression] 3931 ) -> t.Optional[exp.Expression]: 3932 if self._match_text_seq("IGNORE", "NULLS"): 3933 return self.expression(exp.IgnoreNulls, this=this) 3934 if self._match_text_seq("RESPECT", "NULLS"): 3935 return self.expression(exp.RespectNulls, this=this) 3936 return this 3937 3938 def _parse_window( 3939 self, this: t.Optional[exp.Expression], alias: bool = False 3940 ) -> t.Optional[exp.Expression]: 3941 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3942 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3943 self._match_r_paren() 3944 3945 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3946 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3947 if self._match_text_seq("WITHIN", "GROUP"): 3948 order = self._parse_wrapped(self._parse_order) 3949 this = self.expression(exp.WithinGroup, this=this, expression=order) 3950 3951 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3952 # Some dialects choose to implement and some do not. 3953 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3954 3955 # There is some code above in _parse_lambda that handles 3956 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3957 3958 # The below changes handle 3959 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3960 3961 # Oracle allows both formats 3962 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3963 # and Snowflake chose to do the same for familiarity 3964 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3965 this = self._parse_respect_or_ignore_nulls(this) 3966 3967 # bigquery select from window x AS (partition by ...) 3968 if alias: 3969 over = None 3970 self._match(TokenType.ALIAS) 3971 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3972 return this 3973 else: 3974 over = self._prev.text.upper() 3975 3976 if not self._match(TokenType.L_PAREN): 3977 return self.expression( 3978 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3979 ) 3980 3981 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3982 3983 first = self._match(TokenType.FIRST) 3984 if self._match_text_seq("LAST"): 3985 first = False 3986 3987 partition = self._parse_partition_by() 3988 order = self._parse_order() 3989 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3990 3991 if kind: 3992 self._match(TokenType.BETWEEN) 3993 start = self._parse_window_spec() 3994 self._match(TokenType.AND) 3995 end = self._parse_window_spec() 3996 3997 spec = self.expression( 3998 exp.WindowSpec, 3999 kind=kind, 4000 start=start["value"], 4001 start_side=start["side"], 4002 end=end["value"], 4003 end_side=end["side"], 4004 ) 4005 else: 4006 spec = None 4007 4008 self._match_r_paren() 4009 4010 return self.expression( 4011 exp.Window, 4012 this=this, 4013 partition_by=partition, 4014 order=order, 4015 spec=spec, 4016 alias=window_alias, 4017 over=over, 4018 first=first, 4019 ) 4020 4021 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4022 self._match(TokenType.BETWEEN) 4023 4024 return { 4025 "value": ( 4026 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4027 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4028 or self._parse_bitwise() 4029 ), 4030 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4031 } 4032 4033 def _parse_alias( 4034 self, this: t.Optional[exp.Expression], explicit: bool = False 4035 ) -> t.Optional[exp.Expression]: 4036 any_token = self._match(TokenType.ALIAS) 4037 4038 if explicit and not any_token: 4039 return this 4040 4041 if self._match(TokenType.L_PAREN): 4042 aliases = self.expression( 4043 exp.Aliases, 4044 this=this, 4045 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4046 ) 4047 self._match_r_paren(aliases) 4048 return aliases 4049 4050 alias = self._parse_id_var(any_token) 4051 4052 if alias: 4053 return self.expression(exp.Alias, this=this, alias=alias) 4054 4055 return this 4056 4057 def _parse_id_var( 4058 self, 4059 any_token: bool = True, 4060 tokens: t.Optional[t.Collection[TokenType]] = None, 4061 ) -> t.Optional[exp.Expression]: 4062 identifier = self._parse_identifier() 4063 4064 if identifier: 4065 return identifier 4066 4067 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4068 quoted = self._prev.token_type == TokenType.STRING 4069 return exp.Identifier(this=self._prev.text, quoted=quoted) 4070 4071 return None 4072 4073 def _parse_string(self) -> t.Optional[exp.Expression]: 4074 if self._match(TokenType.STRING): 4075 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4076 return self._parse_placeholder() 4077 4078 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4079 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4080 4081 def _parse_number(self) -> t.Optional[exp.Expression]: 4082 if self._match(TokenType.NUMBER): 4083 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4084 return self._parse_placeholder() 4085 4086 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4087 if self._match(TokenType.IDENTIFIER): 4088 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4089 return self._parse_placeholder() 4090 4091 def _parse_var( 4092 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4093 ) -> t.Optional[exp.Expression]: 4094 if ( 4095 (any_token and self._advance_any()) 4096 or self._match(TokenType.VAR) 4097 or (self._match_set(tokens) if tokens else False) 4098 ): 4099 return self.expression(exp.Var, this=self._prev.text) 4100 return self._parse_placeholder() 4101 4102 def _advance_any(self) -> t.Optional[Token]: 4103 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4104 self._advance() 4105 return self._prev 4106 return None 4107 4108 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4109 return self._parse_var() or self._parse_string() 4110 4111 def _parse_null(self) -> t.Optional[exp.Expression]: 4112 if self._match(TokenType.NULL): 4113 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4114 return None 4115 4116 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4117 if self._match(TokenType.TRUE): 4118 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4119 if self._match(TokenType.FALSE): 4120 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4121 return None 4122 4123 def _parse_star(self) -> t.Optional[exp.Expression]: 4124 if self._match(TokenType.STAR): 4125 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4126 return None 4127 4128 def _parse_parameter(self) -> exp.Parameter: 4129 wrapped = self._match(TokenType.L_BRACE) 4130 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4131 self._match(TokenType.R_BRACE) 4132 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4133 4134 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4135 if self._match_set(self.PLACEHOLDER_PARSERS): 4136 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4137 if placeholder: 4138 return placeholder 4139 self._advance(-1) 4140 return None 4141 4142 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4143 if not self._match(TokenType.EXCEPT): 4144 return None 4145 if self._match(TokenType.L_PAREN, advance=False): 4146 return self._parse_wrapped_csv(self._parse_column) 4147 return self._parse_csv(self._parse_column) 4148 4149 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4150 if not self._match(TokenType.REPLACE): 4151 return None 4152 if self._match(TokenType.L_PAREN, advance=False): 4153 return self._parse_wrapped_csv(self._parse_expression) 4154 return self._parse_csv(self._parse_expression) 4155 4156 def _parse_csv( 4157 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4158 ) -> t.List[t.Optional[exp.Expression]]: 4159 parse_result = parse_method() 4160 items = [parse_result] if parse_result is not None else [] 4161 4162 while self._match(sep): 4163 self._add_comments(parse_result) 4164 parse_result = parse_method() 4165 if parse_result is not None: 4166 items.append(parse_result) 4167 4168 return items 4169 4170 def _parse_tokens( 4171 self, parse_method: t.Callable, expressions: t.Dict 4172 ) -> t.Optional[exp.Expression]: 4173 this = parse_method() 4174 4175 while self._match_set(expressions): 4176 this = self.expression( 4177 expressions[self._prev.token_type], 4178 this=this, 4179 comments=self._prev_comments, 4180 expression=parse_method(), 4181 ) 4182 4183 return this 4184 4185 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4186 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4187 4188 def _parse_wrapped_csv( 4189 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4190 ) -> t.List[t.Optional[exp.Expression]]: 4191 return self._parse_wrapped( 4192 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4193 ) 4194 4195 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4196 wrapped = self._match(TokenType.L_PAREN) 4197 if not wrapped and not optional: 4198 self.raise_error("Expecting (") 4199 parse_result = parse_method() 4200 if wrapped: 4201 self._match_r_paren() 4202 return parse_result 4203 4204 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4205 return self._parse_select() or self._parse_set_operations( 4206 self._parse_expression() if alias else self._parse_conjunction() 4207 ) 4208 4209 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4210 return self._parse_query_modifiers( 4211 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4212 ) 4213 4214 def _parse_transaction(self) -> exp.Transaction: 4215 this = None 4216 if self._match_texts(self.TRANSACTION_KIND): 4217 this = self._prev.text 4218 4219 self._match_texts({"TRANSACTION", "WORK"}) 4220 4221 modes = [] 4222 while True: 4223 mode = [] 4224 while self._match(TokenType.VAR): 4225 mode.append(self._prev.text) 4226 4227 if mode: 4228 modes.append(" ".join(mode)) 4229 if not self._match(TokenType.COMMA): 4230 break 4231 4232 return self.expression(exp.Transaction, this=this, modes=modes) 4233 4234 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4235 chain = None 4236 savepoint = None 4237 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4238 4239 self._match_texts({"TRANSACTION", "WORK"}) 4240 4241 if self._match_text_seq("TO"): 4242 self._match_text_seq("SAVEPOINT") 4243 savepoint = self._parse_id_var() 4244 4245 if self._match(TokenType.AND): 4246 chain = not self._match_text_seq("NO") 4247 self._match_text_seq("CHAIN") 4248 4249 if is_rollback: 4250 return self.expression(exp.Rollback, savepoint=savepoint) 4251 4252 return self.expression(exp.Commit, chain=chain) 4253 4254 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4255 if not self._match_text_seq("ADD"): 4256 return None 4257 4258 self._match(TokenType.COLUMN) 4259 exists_column = self._parse_exists(not_=True) 4260 expression = self._parse_column_def(self._parse_field(any_token=True)) 4261 4262 if expression: 4263 expression.set("exists", exists_column) 4264 4265 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4266 if self._match_texts(("FIRST", "AFTER")): 4267 position = self._prev.text 4268 column_position = self.expression( 4269 exp.ColumnPosition, this=self._parse_column(), position=position 4270 ) 4271 expression.set("position", column_position) 4272 4273 return expression 4274 4275 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4276 drop = self._match(TokenType.DROP) and self._parse_drop() 4277 if drop and not isinstance(drop, exp.Command): 4278 drop.set("kind", drop.args.get("kind", "COLUMN")) 4279 return drop 4280 4281 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4282 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4283 return self.expression( 4284 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4285 ) 4286 4287 def _parse_add_constraint(self) -> exp.AddConstraint: 4288 this = None 4289 kind = self._prev.token_type 4290 4291 if kind == TokenType.CONSTRAINT: 4292 this = self._parse_id_var() 4293 4294 if self._match_text_seq("CHECK"): 4295 expression = self._parse_wrapped(self._parse_conjunction) 4296 enforced = self._match_text_seq("ENFORCED") 4297 4298 return self.expression( 4299 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4300 ) 4301 4302 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4303 expression = self._parse_foreign_key() 4304 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4305 expression = self._parse_primary_key() 4306 else: 4307 expression = None 4308 4309 return self.expression(exp.AddConstraint, this=this, expression=expression) 4310 4311 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4312 index = self._index - 1 4313 4314 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4315 return self._parse_csv(self._parse_add_constraint) 4316 4317 self._retreat(index) 4318 return self._parse_csv(self._parse_add_column) 4319 4320 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4321 self._match(TokenType.COLUMN) 4322 column = self._parse_field(any_token=True) 4323 4324 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4325 return self.expression(exp.AlterColumn, this=column, drop=True) 4326 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4327 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4328 4329 self._match_text_seq("SET", "DATA") 4330 return self.expression( 4331 exp.AlterColumn, 4332 this=column, 4333 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4334 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4335 using=self._match(TokenType.USING) and self._parse_conjunction(), 4336 ) 4337 4338 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4339 index = self._index - 1 4340 4341 partition_exists = self._parse_exists() 4342 if self._match(TokenType.PARTITION, advance=False): 4343 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4344 4345 self._retreat(index) 4346 return self._parse_csv(self._parse_drop_column) 4347 4348 def _parse_alter_table_rename(self) -> exp.RenameTable: 4349 self._match_text_seq("TO") 4350 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4351 4352 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4353 start = self._prev 4354 4355 if not self._match(TokenType.TABLE): 4356 return self._parse_as_command(start) 4357 4358 exists = self._parse_exists() 4359 this = self._parse_table(schema=True) 4360 4361 if self._next: 4362 self._advance() 4363 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4364 4365 if parser: 4366 actions = ensure_list(parser(self)) 4367 4368 if not self._curr: 4369 return self.expression( 4370 exp.AlterTable, 4371 this=this, 4372 exists=exists, 4373 actions=actions, 4374 ) 4375 return self._parse_as_command(start) 4376 4377 def _parse_merge(self) -> exp.Merge: 4378 self._match(TokenType.INTO) 4379 target = self._parse_table() 4380 4381 self._match(TokenType.USING) 4382 using = self._parse_table() 4383 4384 self._match(TokenType.ON) 4385 on = self._parse_conjunction() 4386 4387 whens = [] 4388 while self._match(TokenType.WHEN): 4389 matched = not self._match(TokenType.NOT) 4390 self._match_text_seq("MATCHED") 4391 source = ( 4392 False 4393 if self._match_text_seq("BY", "TARGET") 4394 else self._match_text_seq("BY", "SOURCE") 4395 ) 4396 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4397 4398 self._match(TokenType.THEN) 4399 4400 if self._match(TokenType.INSERT): 4401 _this = self._parse_star() 4402 if _this: 4403 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4404 else: 4405 then = self.expression( 4406 exp.Insert, 4407 this=self._parse_value(), 4408 expression=self._match(TokenType.VALUES) and self._parse_value(), 4409 ) 4410 elif self._match(TokenType.UPDATE): 4411 expressions = self._parse_star() 4412 if expressions: 4413 then = self.expression(exp.Update, expressions=expressions) 4414 else: 4415 then = self.expression( 4416 exp.Update, 4417 expressions=self._match(TokenType.SET) 4418 and self._parse_csv(self._parse_equality), 4419 ) 4420 elif self._match(TokenType.DELETE): 4421 then = self.expression(exp.Var, this=self._prev.text) 4422 else: 4423 then = None 4424 4425 whens.append( 4426 self.expression( 4427 exp.When, 4428 matched=matched, 4429 source=source, 4430 condition=condition, 4431 then=then, 4432 ) 4433 ) 4434 4435 return self.expression( 4436 exp.Merge, 4437 this=target, 4438 using=using, 4439 on=on, 4440 expressions=whens, 4441 ) 4442 4443 def _parse_show(self) -> t.Optional[exp.Expression]: 4444 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4445 if parser: 4446 return parser(self) 4447 self._advance() 4448 return self.expression(exp.Show, this=self._prev.text.upper()) 4449 4450 def _parse_set_item_assignment( 4451 self, kind: t.Optional[str] = None 4452 ) -> t.Optional[exp.Expression]: 4453 index = self._index 4454 4455 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4456 return self._parse_set_transaction(global_=kind == "GLOBAL") 4457 4458 left = self._parse_primary() or self._parse_id_var() 4459 4460 if not self._match_texts(("=", "TO")): 4461 self._retreat(index) 4462 return None 4463 4464 right = self._parse_statement() or self._parse_id_var() 4465 this = self.expression(exp.EQ, this=left, expression=right) 4466 4467 return self.expression(exp.SetItem, this=this, kind=kind) 4468 4469 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4470 self._match_text_seq("TRANSACTION") 4471 characteristics = self._parse_csv( 4472 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4473 ) 4474 return self.expression( 4475 exp.SetItem, 4476 expressions=characteristics, 4477 kind="TRANSACTION", 4478 **{"global": global_}, # type: ignore 4479 ) 4480 4481 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4482 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4483 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4484 4485 def _parse_set(self) -> exp.Set | exp.Command: 4486 index = self._index 4487 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4488 4489 if self._curr: 4490 self._retreat(index) 4491 return self._parse_as_command(self._prev) 4492 4493 return set_ 4494 4495 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4496 for option in options: 4497 if self._match_text_seq(*option.split(" ")): 4498 return exp.var(option) 4499 return None 4500 4501 def _parse_as_command(self, start: Token) -> exp.Command: 4502 while self._curr: 4503 self._advance() 4504 text = self._find_sql(start, self._prev) 4505 size = len(start.text) 4506 return exp.Command(this=text[:size], expression=text[size:]) 4507 4508 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4509 settings = [] 4510 4511 self._match_l_paren() 4512 kind = self._parse_id_var() 4513 4514 if self._match(TokenType.L_PAREN): 4515 while True: 4516 key = self._parse_id_var() 4517 value = self._parse_primary() 4518 4519 if not key and value is None: 4520 break 4521 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4522 self._match(TokenType.R_PAREN) 4523 4524 self._match_r_paren() 4525 4526 return self.expression( 4527 exp.DictProperty, 4528 this=this, 4529 kind=kind.this if kind else None, 4530 settings=settings, 4531 ) 4532 4533 def _parse_dict_range(self, this: str) -> exp.DictRange: 4534 self._match_l_paren() 4535 has_min = self._match_text_seq("MIN") 4536 if has_min: 4537 min = self._parse_var() or self._parse_primary() 4538 self._match_text_seq("MAX") 4539 max = self._parse_var() or self._parse_primary() 4540 else: 4541 max = self._parse_var() or self._parse_primary() 4542 min = exp.Literal.number(0) 4543 self._match_r_paren() 4544 return self.expression(exp.DictRange, this=this, min=min, max=max) 4545 4546 def _find_parser( 4547 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4548 ) -> t.Optional[t.Callable]: 4549 if not self._curr: 4550 return None 4551 4552 index = self._index 4553 this = [] 4554 while True: 4555 # The current token might be multiple words 4556 curr = self._curr.text.upper() 4557 key = curr.split(" ") 4558 this.append(curr) 4559 self._advance() 4560 result, trie = in_trie(trie, key) 4561 if result == 0: 4562 break 4563 if result == 2: 4564 subparser = parsers[" ".join(this)] 4565 return subparser 4566 self._retreat(index) 4567 return None 4568 4569 def _match(self, token_type, advance=True, expression=None): 4570 if not self._curr: 4571 return None 4572 4573 if self._curr.token_type == token_type: 4574 if advance: 4575 self._advance() 4576 self._add_comments(expression) 4577 return True 4578 4579 return None 4580 4581 def _match_set(self, types, advance=True): 4582 if not self._curr: 4583 return None 4584 4585 if self._curr.token_type in types: 4586 if advance: 4587 self._advance() 4588 return True 4589 4590 return None 4591 4592 def _match_pair(self, token_type_a, token_type_b, advance=True): 4593 if not self._curr or not self._next: 4594 return None 4595 4596 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4597 if advance: 4598 self._advance(2) 4599 return True 4600 4601 return None 4602 4603 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4604 if not self._match(TokenType.L_PAREN, expression=expression): 4605 self.raise_error("Expecting (") 4606 4607 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4608 if not self._match(TokenType.R_PAREN, expression=expression): 4609 self.raise_error("Expecting )") 4610 4611 def _match_texts(self, texts, advance=True): 4612 if self._curr and self._curr.text.upper() in texts: 4613 if advance: 4614 self._advance() 4615 return True 4616 return False 4617 4618 def _match_text_seq(self, *texts, advance=True): 4619 index = self._index 4620 for text in texts: 4621 if self._curr and self._curr.text.upper() == text: 4622 self._advance() 4623 else: 4624 self._retreat(index) 4625 return False 4626 4627 if not advance: 4628 self._retreat(index) 4629 4630 return True 4631 4632 @t.overload 4633 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4634 ... 4635 4636 @t.overload 4637 def _replace_columns_with_dots( 4638 self, this: t.Optional[exp.Expression] 4639 ) -> t.Optional[exp.Expression]: 4640 ... 4641 4642 def _replace_columns_with_dots(self, this): 4643 if isinstance(this, exp.Dot): 4644 exp.replace_children(this, self._replace_columns_with_dots) 4645 elif isinstance(this, exp.Column): 4646 exp.replace_children(this, self._replace_columns_with_dots) 4647 table = this.args.get("table") 4648 this = ( 4649 self.expression(exp.Dot, this=table, expression=this.this) 4650 if table 4651 else self.expression(exp.Var, this=this.name) 4652 ) 4653 elif isinstance(this, exp.Identifier): 4654 this = self.expression(exp.Var, this=this.name) 4655 4656 return this 4657 4658 def _replace_lambda( 4659 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4660 ) -> t.Optional[exp.Expression]: 4661 if not node: 4662 return node 4663 4664 for column in node.find_all(exp.Column): 4665 if column.parts[0].name in lambda_variables: 4666 dot_or_id = column.to_dot() if column.table else column.this 4667 parent = column.parent 4668 4669 while isinstance(parent, exp.Dot): 4670 if not isinstance(parent.parent, exp.Dot): 4671 parent.replace(dot_or_id) 4672 break 4673 parent = parent.parent 4674 else: 4675 if column is node: 4676 node = dot_or_id 4677 else: 4678 column.replace(dot_or_id) 4679 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
824 def __init__( 825 self, 826 error_level: t.Optional[ErrorLevel] = None, 827 error_message_context: int = 100, 828 max_errors: int = 3, 829 ): 830 self.error_level = error_level or ErrorLevel.IMMEDIATE 831 self.error_message_context = error_message_context 832 self.max_errors = max_errors 833 self.reset()
845 def parse( 846 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 847 ) -> t.List[t.Optional[exp.Expression]]: 848 """ 849 Parses a list of tokens and returns a list of syntax trees, one tree 850 per parsed SQL statement. 851 852 Args: 853 raw_tokens: The list of tokens. 854 sql: The original SQL string, used to produce helpful debug messages. 855 856 Returns: 857 The list of the produced syntax trees. 858 """ 859 return self._parse( 860 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 861 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
863 def parse_into( 864 self, 865 expression_types: exp.IntoType, 866 raw_tokens: t.List[Token], 867 sql: t.Optional[str] = None, 868 ) -> t.List[t.Optional[exp.Expression]]: 869 """ 870 Parses a list of tokens into a given Expression type. If a collection of Expression 871 types is given instead, this method will try to parse the token list into each one 872 of them, stopping at the first for which the parsing succeeds. 873 874 Args: 875 expression_types: The expression type(s) to try and parse the token list into. 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The target Expression. 881 """ 882 errors = [] 883 for expression_type in ensure_list(expression_types): 884 parser = self.EXPRESSION_PARSERS.get(expression_type) 885 if not parser: 886 raise TypeError(f"No parser registered for {expression_type}") 887 888 try: 889 return self._parse(parser, raw_tokens, sql) 890 except ParseError as e: 891 e.errors[0]["into_expression"] = expression_type 892 errors.append(e) 893 894 raise ParseError( 895 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 896 errors=merge_errors(errors), 897 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
934 def check_errors(self) -> None: 935 """Logs or raises any found errors, depending on the chosen error level setting.""" 936 if self.error_level == ErrorLevel.WARN: 937 for error in self.errors: 938 logger.error(str(error)) 939 elif self.error_level == ErrorLevel.RAISE and self.errors: 940 raise ParseError( 941 concat_messages(self.errors, self.max_errors), 942 errors=merge_errors(self.errors), 943 )
Logs or raises any found errors, depending on the chosen error level setting.
945 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 946 """ 947 Appends an error in the list of recorded errors or raises it, depending on the chosen 948 error level setting. 949 """ 950 token = token or self._curr or self._prev or Token.string("") 951 start = token.start 952 end = token.end + 1 953 start_context = self.sql[max(start - self.error_message_context, 0) : start] 954 highlight = self.sql[start:end] 955 end_context = self.sql[end : end + self.error_message_context] 956 957 error = ParseError.new( 958 f"{message}. Line {token.line}, Col: {token.col}.\n" 959 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 960 description=message, 961 line=token.line, 962 col=token.col, 963 start_context=start_context, 964 highlight=highlight, 965 end_context=end_context, 966 ) 967 968 if self.error_level == ErrorLevel.IMMEDIATE: 969 raise error 970 971 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
973 def expression( 974 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 975 ) -> E: 976 """ 977 Creates a new, validated Expression. 978 979 Args: 980 exp_class: The expression class to instantiate. 981 comments: An optional list of comments to attach to the expression. 982 kwargs: The arguments to set for the expression along with their respective values. 983 984 Returns: 985 The target expression. 986 """ 987 instance = exp_class(**kwargs) 988 instance.add_comments(comments) if comments else self._add_comments(instance) 989 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
996 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 997 """ 998 Validates an Expression, making sure that all its mandatory arguments are set. 999 1000 Args: 1001 expression: The expression to validate. 1002 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1003 1004 Returns: 1005 The validated expression. 1006 """ 1007 if self.error_level != ErrorLevel.IGNORE: 1008 for error_message in expression.error_messages(args): 1009 self.raise_error(error_message) 1010 1011 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.