sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.MONEY, 181 TokenType.SMALLMONEY, 182 TokenType.ROWVERSION, 183 TokenType.IMAGE, 184 TokenType.VARIANT, 185 TokenType.OBJECT, 186 TokenType.INET, 187 TokenType.ENUM, 188 *NESTED_TYPE_TOKENS, 189 } 190 191 SUBQUERY_PREDICATES = { 192 TokenType.ANY: exp.Any, 193 TokenType.ALL: exp.All, 194 TokenType.EXISTS: exp.Exists, 195 TokenType.SOME: exp.Any, 196 } 197 198 RESERVED_KEYWORDS = { 199 *Tokenizer.SINGLE_TOKENS.values(), 200 TokenType.SELECT, 201 } 202 203 DB_CREATABLES = { 204 TokenType.DATABASE, 205 TokenType.SCHEMA, 206 TokenType.TABLE, 207 TokenType.VIEW, 208 TokenType.DICTIONARY, 209 } 210 211 CREATABLES = { 212 TokenType.COLUMN, 213 TokenType.FUNCTION, 214 TokenType.INDEX, 215 TokenType.PROCEDURE, 216 *DB_CREATABLES, 217 } 218 219 # Tokens that can represent identifiers 220 ID_VAR_TOKENS = { 221 TokenType.VAR, 222 TokenType.ANTI, 223 TokenType.APPLY, 224 TokenType.ASC, 225 TokenType.AUTO_INCREMENT, 226 TokenType.BEGIN, 227 TokenType.CACHE, 228 TokenType.CASE, 229 TokenType.COLLATE, 230 TokenType.COMMAND, 231 TokenType.COMMENT, 232 TokenType.COMMIT, 233 TokenType.CONSTRAINT, 234 TokenType.DEFAULT, 235 TokenType.DELETE, 236 TokenType.DESC, 237 TokenType.DESCRIBE, 238 TokenType.DICTIONARY, 239 TokenType.DIV, 240 TokenType.END, 241 TokenType.EXECUTE, 242 TokenType.ESCAPE, 243 TokenType.FALSE, 244 TokenType.FIRST, 245 TokenType.FILTER, 246 TokenType.FORMAT, 247 TokenType.FULL, 248 TokenType.IF, 249 TokenType.IS, 250 TokenType.ISNULL, 251 TokenType.INTERVAL, 252 TokenType.KEEP, 253 TokenType.LEFT, 254 TokenType.LOAD, 255 TokenType.MERGE, 256 TokenType.NATURAL, 257 TokenType.NEXT, 258 TokenType.OFFSET, 259 TokenType.ORDINALITY, 260 TokenType.OVERWRITE, 261 TokenType.PARTITION, 262 TokenType.PERCENT, 263 TokenType.PIVOT, 264 TokenType.PRAGMA, 265 TokenType.RANGE, 266 TokenType.REFERENCES, 267 TokenType.RIGHT, 268 TokenType.ROW, 269 TokenType.ROWS, 270 TokenType.SEMI, 271 TokenType.SET, 272 TokenType.SETTINGS, 273 TokenType.SHOW, 274 TokenType.TEMPORARY, 275 TokenType.TOP, 276 TokenType.TRUE, 277 TokenType.UNIQUE, 278 TokenType.UNPIVOT, 279 TokenType.UPDATE, 280 TokenType.VOLATILE, 281 TokenType.WINDOW, 282 *CREATABLES, 283 *SUBQUERY_PREDICATES, 284 *TYPE_TOKENS, 285 *NO_PAREN_FUNCTIONS, 286 } 287 288 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 289 290 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 291 TokenType.APPLY, 292 TokenType.ASOF, 293 TokenType.FULL, 294 TokenType.LEFT, 295 TokenType.LOCK, 296 TokenType.NATURAL, 297 TokenType.OFFSET, 298 TokenType.RIGHT, 299 TokenType.WINDOW, 300 } 301 302 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 303 304 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 305 306 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 307 308 FUNC_TOKENS = { 309 TokenType.COMMAND, 310 TokenType.CURRENT_DATE, 311 TokenType.CURRENT_DATETIME, 312 TokenType.CURRENT_TIMESTAMP, 313 TokenType.CURRENT_TIME, 314 TokenType.CURRENT_USER, 315 TokenType.FILTER, 316 TokenType.FIRST, 317 TokenType.FORMAT, 318 TokenType.GLOB, 319 TokenType.IDENTIFIER, 320 TokenType.INDEX, 321 TokenType.ISNULL, 322 TokenType.ILIKE, 323 TokenType.LIKE, 324 TokenType.MERGE, 325 TokenType.OFFSET, 326 TokenType.PRIMARY_KEY, 327 TokenType.RANGE, 328 TokenType.REPLACE, 329 TokenType.ROW, 330 TokenType.UNNEST, 331 TokenType.VAR, 332 TokenType.LEFT, 333 TokenType.RIGHT, 334 TokenType.DATE, 335 TokenType.DATETIME, 336 TokenType.TABLE, 337 TokenType.TIMESTAMP, 338 TokenType.TIMESTAMPTZ, 339 TokenType.WINDOW, 340 *TYPE_TOKENS, 341 *SUBQUERY_PREDICATES, 342 } 343 344 CONJUNCTION = { 345 TokenType.AND: exp.And, 346 TokenType.OR: exp.Or, 347 } 348 349 EQUALITY = { 350 TokenType.EQ: exp.EQ, 351 TokenType.NEQ: exp.NEQ, 352 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 353 } 354 355 COMPARISON = { 356 TokenType.GT: exp.GT, 357 TokenType.GTE: exp.GTE, 358 TokenType.LT: exp.LT, 359 TokenType.LTE: exp.LTE, 360 } 361 362 BITWISE = { 363 TokenType.AMP: exp.BitwiseAnd, 364 TokenType.CARET: exp.BitwiseXor, 365 TokenType.PIPE: exp.BitwiseOr, 366 TokenType.DPIPE: exp.DPipe, 367 } 368 369 TERM = { 370 TokenType.DASH: exp.Sub, 371 TokenType.PLUS: exp.Add, 372 TokenType.MOD: exp.Mod, 373 TokenType.COLLATE: exp.Collate, 374 } 375 376 FACTOR = { 377 TokenType.DIV: exp.IntDiv, 378 TokenType.LR_ARROW: exp.Distance, 379 TokenType.SLASH: exp.Div, 380 TokenType.STAR: exp.Mul, 381 } 382 383 TIMESTAMPS = { 384 TokenType.TIME, 385 TokenType.TIMESTAMP, 386 TokenType.TIMESTAMPTZ, 387 TokenType.TIMESTAMPLTZ, 388 } 389 390 SET_OPERATIONS = { 391 TokenType.UNION, 392 TokenType.INTERSECT, 393 TokenType.EXCEPT, 394 } 395 396 JOIN_METHODS = { 397 TokenType.NATURAL, 398 TokenType.ASOF, 399 } 400 401 JOIN_SIDES = { 402 TokenType.LEFT, 403 TokenType.RIGHT, 404 TokenType.FULL, 405 } 406 407 JOIN_KINDS = { 408 TokenType.INNER, 409 TokenType.OUTER, 410 TokenType.CROSS, 411 TokenType.SEMI, 412 TokenType.ANTI, 413 } 414 415 JOIN_HINTS: t.Set[str] = set() 416 417 LAMBDAS = { 418 TokenType.ARROW: lambda self, expressions: self.expression( 419 exp.Lambda, 420 this=self._replace_lambda( 421 self._parse_conjunction(), 422 {node.name for node in expressions}, 423 ), 424 expressions=expressions, 425 ), 426 TokenType.FARROW: lambda self, expressions: self.expression( 427 exp.Kwarg, 428 this=exp.var(expressions[0].name), 429 expression=self._parse_conjunction(), 430 ), 431 } 432 433 COLUMN_OPERATORS = { 434 TokenType.DOT: None, 435 TokenType.DCOLON: lambda self, this, to: self.expression( 436 exp.Cast if self.STRICT_CAST else exp.TryCast, 437 this=this, 438 to=to, 439 ), 440 TokenType.ARROW: lambda self, this, path: self.expression( 441 exp.JSONExtract, 442 this=this, 443 expression=path, 444 ), 445 TokenType.DARROW: lambda self, this, path: self.expression( 446 exp.JSONExtractScalar, 447 this=this, 448 expression=path, 449 ), 450 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 451 exp.JSONBExtract, 452 this=this, 453 expression=path, 454 ), 455 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtractScalar, 457 this=this, 458 expression=path, 459 ), 460 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 461 exp.JSONBContains, 462 this=this, 463 expression=key, 464 ), 465 } 466 467 EXPRESSION_PARSERS = { 468 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 469 exp.Column: lambda self: self._parse_column(), 470 exp.Condition: lambda self: self._parse_conjunction(), 471 exp.DataType: lambda self: self._parse_types(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.From: lambda self: self._parse_from(), 474 exp.Group: lambda self: self._parse_group(), 475 exp.Having: lambda self: self._parse_having(), 476 exp.Identifier: lambda self: self._parse_id_var(), 477 exp.Join: lambda self: self._parse_join(), 478 exp.Lambda: lambda self: self._parse_lambda(), 479 exp.Lateral: lambda self: self._parse_lateral(), 480 exp.Limit: lambda self: self._parse_limit(), 481 exp.Offset: lambda self: self._parse_offset(), 482 exp.Order: lambda self: self._parse_order(), 483 exp.Ordered: lambda self: self._parse_ordered(), 484 exp.Properties: lambda self: self._parse_properties(), 485 exp.Qualify: lambda self: self._parse_qualify(), 486 exp.Returning: lambda self: self._parse_returning(), 487 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 488 exp.Table: lambda self: self._parse_table_parts(), 489 exp.TableAlias: lambda self: self._parse_table_alias(), 490 exp.Where: lambda self: self._parse_where(), 491 exp.Window: lambda self: self._parse_named_window(), 492 exp.With: lambda self: self._parse_with(), 493 "JOIN_TYPE": lambda self: self._parse_join_parts(), 494 } 495 496 STATEMENT_PARSERS = { 497 TokenType.ALTER: lambda self: self._parse_alter(), 498 TokenType.BEGIN: lambda self: self._parse_transaction(), 499 TokenType.CACHE: lambda self: self._parse_cache(), 500 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 501 TokenType.COMMENT: lambda self: self._parse_comment(), 502 TokenType.CREATE: lambda self: self._parse_create(), 503 TokenType.DELETE: lambda self: self._parse_delete(), 504 TokenType.DESC: lambda self: self._parse_describe(), 505 TokenType.DESCRIBE: lambda self: self._parse_describe(), 506 TokenType.DROP: lambda self: self._parse_drop(), 507 TokenType.END: lambda self: self._parse_commit_or_rollback(), 508 TokenType.FROM: lambda self: exp.select("*").from_( 509 t.cast(exp.From, self._parse_from(skip_from_token=True)) 510 ), 511 TokenType.INSERT: lambda self: self._parse_insert(), 512 TokenType.LOAD: lambda self: self._parse_load(), 513 TokenType.MERGE: lambda self: self._parse_merge(), 514 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 515 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 516 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 517 TokenType.SET: lambda self: self._parse_set(), 518 TokenType.UNCACHE: lambda self: self._parse_uncache(), 519 TokenType.UPDATE: lambda self: self._parse_update(), 520 TokenType.USE: lambda self: self.expression( 521 exp.Use, 522 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 523 and exp.var(self._prev.text), 524 this=self._parse_table(schema=False), 525 ), 526 } 527 528 UNARY_PARSERS = { 529 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 530 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 531 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 532 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 533 } 534 535 PRIMARY_PARSERS = { 536 TokenType.STRING: lambda self, token: self.expression( 537 exp.Literal, this=token.text, is_string=True 538 ), 539 TokenType.NUMBER: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=False 541 ), 542 TokenType.STAR: lambda self, _: self.expression( 543 exp.Star, 544 **{"except": self._parse_except(), "replace": self._parse_replace()}, 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER": lambda self: self._parse_cluster(), 588 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 589 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 590 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 591 "DEFINER": lambda self: self._parse_definer(), 592 "DETERMINISTIC": lambda self: self.expression( 593 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 594 ), 595 "DISTKEY": lambda self: self._parse_distkey(), 596 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 597 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 598 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 599 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 600 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 601 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 602 "FREESPACE": lambda self: self._parse_freespace(), 603 "IMMUTABLE": lambda self: self.expression( 604 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 605 ), 606 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 607 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 608 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 609 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 610 "LIKE": lambda self: self._parse_create_like(), 611 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 612 "LOCK": lambda self: self._parse_locking(), 613 "LOCKING": lambda self: self._parse_locking(), 614 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 615 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 616 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 617 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 618 "NO": lambda self: self._parse_no_property(), 619 "ON": lambda self: self._parse_on_property(), 620 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 621 "PARTITION BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 625 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 626 "RETURNS": lambda self: self._parse_returns(), 627 "ROW": lambda self: self._parse_row(), 628 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 629 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 630 "SETTINGS": lambda self: self.expression( 631 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 632 ), 633 "SORTKEY": lambda self: self._parse_sortkey(), 634 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 635 "STABLE": lambda self: self.expression( 636 exp.StabilityProperty, this=exp.Literal.string("STABLE") 637 ), 638 "STORED": lambda self: self._parse_stored(), 639 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 640 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 641 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 642 "TO": lambda self: self._parse_to_table(), 643 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 644 "TTL": lambda self: self._parse_ttl(), 645 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 646 "VOLATILE": lambda self: self._parse_volatile_property(), 647 "WITH": lambda self: self._parse_with_property(), 648 } 649 650 CONSTRAINT_PARSERS = { 651 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 652 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 653 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 654 "CHARACTER SET": lambda self: self.expression( 655 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 656 ), 657 "CHECK": lambda self: self.expression( 658 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 659 ), 660 "COLLATE": lambda self: self.expression( 661 exp.CollateColumnConstraint, this=self._parse_var() 662 ), 663 "COMMENT": lambda self: self.expression( 664 exp.CommentColumnConstraint, this=self._parse_string() 665 ), 666 "COMPRESS": lambda self: self._parse_compress(), 667 "DEFAULT": lambda self: self.expression( 668 exp.DefaultColumnConstraint, this=self._parse_bitwise() 669 ), 670 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 671 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 672 "FORMAT": lambda self: self.expression( 673 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 674 ), 675 "GENERATED": lambda self: self._parse_generated_as_identity(), 676 "IDENTITY": lambda self: self._parse_auto_increment(), 677 "INLINE": lambda self: self._parse_inline(), 678 "LIKE": lambda self: self._parse_create_like(), 679 "NOT": lambda self: self._parse_not_constraint(), 680 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 681 "ON": lambda self: self._match(TokenType.UPDATE) 682 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 683 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 684 "PRIMARY KEY": lambda self: self._parse_primary_key(), 685 "REFERENCES": lambda self: self._parse_references(match=False), 686 "TITLE": lambda self: self.expression( 687 exp.TitleColumnConstraint, this=self._parse_var_or_string() 688 ), 689 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 690 "UNIQUE": lambda self: self._parse_unique(), 691 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 692 } 693 694 ALTER_PARSERS = { 695 "ADD": lambda self: self._parse_alter_table_add(), 696 "ALTER": lambda self: self._parse_alter_table_alter(), 697 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 698 "DROP": lambda self: self._parse_alter_table_drop(), 699 "RENAME": lambda self: self._parse_alter_table_rename(), 700 } 701 702 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 703 704 NO_PAREN_FUNCTION_PARSERS = { 705 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 706 TokenType.CASE: lambda self: self._parse_case(), 707 TokenType.IF: lambda self: self._parse_if(), 708 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 709 exp.NextValueFor, 710 this=self._parse_column(), 711 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 712 ), 713 } 714 715 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 716 717 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 718 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 719 "CONCAT": lambda self: self._parse_concat(), 720 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 721 "DECODE": lambda self: self._parse_decode(), 722 "EXTRACT": lambda self: self._parse_extract(), 723 "JSON_OBJECT": lambda self: self._parse_json_object(), 724 "LOG": lambda self: self._parse_logarithm(), 725 "MATCH": lambda self: self._parse_match_against(), 726 "OPENJSON": lambda self: self._parse_open_json(), 727 "POSITION": lambda self: self._parse_position(), 728 "SAFE_CAST": lambda self: self._parse_cast(False), 729 "STRING_AGG": lambda self: self._parse_string_agg(), 730 "SUBSTRING": lambda self: self._parse_substring(), 731 "TRIM": lambda self: self._parse_trim(), 732 "TRY_CAST": lambda self: self._parse_cast(False), 733 "TRY_CONVERT": lambda self: self._parse_convert(False), 734 } 735 736 QUERY_MODIFIER_PARSERS = { 737 "joins": lambda self: list(iter(self._parse_join, None)), 738 "laterals": lambda self: list(iter(self._parse_lateral, None)), 739 "match": lambda self: self._parse_match_recognize(), 740 "where": lambda self: self._parse_where(), 741 "group": lambda self: self._parse_group(), 742 "having": lambda self: self._parse_having(), 743 "qualify": lambda self: self._parse_qualify(), 744 "windows": lambda self: self._parse_window_clause(), 745 "order": lambda self: self._parse_order(), 746 "limit": lambda self: self._parse_limit(), 747 "offset": lambda self: self._parse_offset(), 748 "locks": lambda self: self._parse_locks(), 749 "sample": lambda self: self._parse_table_sample(as_modifier=True), 750 } 751 752 SET_PARSERS = { 753 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 754 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 755 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 756 "TRANSACTION": lambda self: self._parse_set_transaction(), 757 } 758 759 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 760 761 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 762 763 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 764 765 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 766 767 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 768 769 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 770 TRANSACTION_CHARACTERISTICS = { 771 "ISOLATION LEVEL REPEATABLE READ", 772 "ISOLATION LEVEL READ COMMITTED", 773 "ISOLATION LEVEL READ UNCOMMITTED", 774 "ISOLATION LEVEL SERIALIZABLE", 775 "READ WRITE", 776 "READ ONLY", 777 } 778 779 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 780 781 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 782 783 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 784 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 785 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 786 787 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 788 789 STRICT_CAST = True 790 791 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 792 793 CONVERT_TYPE_FIRST = False 794 795 PREFIXED_PIVOT_COLUMNS = False 796 IDENTIFY_PIVOT_STRINGS = False 797 798 LOG_BASE_FIRST = True 799 LOG_DEFAULTS_TO_LN = False 800 801 __slots__ = ( 802 "error_level", 803 "error_message_context", 804 "max_errors", 805 "sql", 806 "errors", 807 "_tokens", 808 "_index", 809 "_curr", 810 "_next", 811 "_prev", 812 "_prev_comments", 813 ) 814 815 # Autofilled 816 INDEX_OFFSET: int = 0 817 UNNEST_COLUMN_ONLY: bool = False 818 ALIAS_POST_TABLESAMPLE: bool = False 819 STRICT_STRING_CONCAT = False 820 NULL_ORDERING: str = "nulls_are_small" 821 SHOW_TRIE: t.Dict = {} 822 SET_TRIE: t.Dict = {} 823 FORMAT_MAPPING: t.Dict[str, str] = {} 824 FORMAT_TRIE: t.Dict = {} 825 TIME_MAPPING: t.Dict[str, str] = {} 826 TIME_TRIE: t.Dict = {} 827 828 def __init__( 829 self, 830 error_level: t.Optional[ErrorLevel] = None, 831 error_message_context: int = 100, 832 max_errors: int = 3, 833 ): 834 self.error_level = error_level or ErrorLevel.IMMEDIATE 835 self.error_message_context = error_message_context 836 self.max_errors = max_errors 837 self.reset() 838 839 def reset(self): 840 self.sql = "" 841 self.errors = [] 842 self._tokens = [] 843 self._index = 0 844 self._curr = None 845 self._next = None 846 self._prev = None 847 self._prev_comments = None 848 849 def parse( 850 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 851 ) -> t.List[t.Optional[exp.Expression]]: 852 """ 853 Parses a list of tokens and returns a list of syntax trees, one tree 854 per parsed SQL statement. 855 856 Args: 857 raw_tokens: The list of tokens. 858 sql: The original SQL string, used to produce helpful debug messages. 859 860 Returns: 861 The list of the produced syntax trees. 862 """ 863 return self._parse( 864 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 865 ) 866 867 def parse_into( 868 self, 869 expression_types: exp.IntoType, 870 raw_tokens: t.List[Token], 871 sql: t.Optional[str] = None, 872 ) -> t.List[t.Optional[exp.Expression]]: 873 """ 874 Parses a list of tokens into a given Expression type. If a collection of Expression 875 types is given instead, this method will try to parse the token list into each one 876 of them, stopping at the first for which the parsing succeeds. 877 878 Args: 879 expression_types: The expression type(s) to try and parse the token list into. 880 raw_tokens: The list of tokens. 881 sql: The original SQL string, used to produce helpful debug messages. 882 883 Returns: 884 The target Expression. 885 """ 886 errors = [] 887 for expression_type in ensure_list(expression_types): 888 parser = self.EXPRESSION_PARSERS.get(expression_type) 889 if not parser: 890 raise TypeError(f"No parser registered for {expression_type}") 891 892 try: 893 return self._parse(parser, raw_tokens, sql) 894 except ParseError as e: 895 e.errors[0]["into_expression"] = expression_type 896 errors.append(e) 897 898 raise ParseError( 899 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 900 errors=merge_errors(errors), 901 ) from errors[-1] 902 903 def _parse( 904 self, 905 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 906 raw_tokens: t.List[Token], 907 sql: t.Optional[str] = None, 908 ) -> t.List[t.Optional[exp.Expression]]: 909 self.reset() 910 self.sql = sql or "" 911 912 total = len(raw_tokens) 913 chunks: t.List[t.List[Token]] = [[]] 914 915 for i, token in enumerate(raw_tokens): 916 if token.token_type == TokenType.SEMICOLON: 917 if i < total - 1: 918 chunks.append([]) 919 else: 920 chunks[-1].append(token) 921 922 expressions = [] 923 924 for tokens in chunks: 925 self._index = -1 926 self._tokens = tokens 927 self._advance() 928 929 expressions.append(parse_method(self)) 930 931 if self._index < len(self._tokens): 932 self.raise_error("Invalid expression / Unexpected token") 933 934 self.check_errors() 935 936 return expressions 937 938 def check_errors(self) -> None: 939 """Logs or raises any found errors, depending on the chosen error level setting.""" 940 if self.error_level == ErrorLevel.WARN: 941 for error in self.errors: 942 logger.error(str(error)) 943 elif self.error_level == ErrorLevel.RAISE and self.errors: 944 raise ParseError( 945 concat_messages(self.errors, self.max_errors), 946 errors=merge_errors(self.errors), 947 ) 948 949 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 950 """ 951 Appends an error in the list of recorded errors or raises it, depending on the chosen 952 error level setting. 953 """ 954 token = token or self._curr or self._prev or Token.string("") 955 start = token.start 956 end = token.end + 1 957 start_context = self.sql[max(start - self.error_message_context, 0) : start] 958 highlight = self.sql[start:end] 959 end_context = self.sql[end : end + self.error_message_context] 960 961 error = ParseError.new( 962 f"{message}. Line {token.line}, Col: {token.col}.\n" 963 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 964 description=message, 965 line=token.line, 966 col=token.col, 967 start_context=start_context, 968 highlight=highlight, 969 end_context=end_context, 970 ) 971 972 if self.error_level == ErrorLevel.IMMEDIATE: 973 raise error 974 975 self.errors.append(error) 976 977 def expression( 978 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 979 ) -> E: 980 """ 981 Creates a new, validated Expression. 982 983 Args: 984 exp_class: The expression class to instantiate. 985 comments: An optional list of comments to attach to the expression. 986 kwargs: The arguments to set for the expression along with their respective values. 987 988 Returns: 989 The target expression. 990 """ 991 instance = exp_class(**kwargs) 992 instance.add_comments(comments) if comments else self._add_comments(instance) 993 return self.validate_expression(instance) 994 995 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 996 if expression and self._prev_comments: 997 expression.add_comments(self._prev_comments) 998 self._prev_comments = None 999 1000 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1001 """ 1002 Validates an Expression, making sure that all its mandatory arguments are set. 1003 1004 Args: 1005 expression: The expression to validate. 1006 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1007 1008 Returns: 1009 The validated expression. 1010 """ 1011 if self.error_level != ErrorLevel.IGNORE: 1012 for error_message in expression.error_messages(args): 1013 self.raise_error(error_message) 1014 1015 return expression 1016 1017 def _find_sql(self, start: Token, end: Token) -> str: 1018 return self.sql[start.start : end.end + 1] 1019 1020 def _advance(self, times: int = 1) -> None: 1021 self._index += times 1022 self._curr = seq_get(self._tokens, self._index) 1023 self._next = seq_get(self._tokens, self._index + 1) 1024 1025 if self._index > 0: 1026 self._prev = self._tokens[self._index - 1] 1027 self._prev_comments = self._prev.comments 1028 else: 1029 self._prev = None 1030 self._prev_comments = None 1031 1032 def _retreat(self, index: int) -> None: 1033 if index != self._index: 1034 self._advance(index - self._index) 1035 1036 def _parse_command(self) -> exp.Command: 1037 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1038 1039 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1040 start = self._prev 1041 exists = self._parse_exists() if allow_exists else None 1042 1043 self._match(TokenType.ON) 1044 1045 kind = self._match_set(self.CREATABLES) and self._prev 1046 if not kind: 1047 return self._parse_as_command(start) 1048 1049 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1050 this = self._parse_user_defined_function(kind=kind.token_type) 1051 elif kind.token_type == TokenType.TABLE: 1052 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1053 elif kind.token_type == TokenType.COLUMN: 1054 this = self._parse_column() 1055 else: 1056 this = self._parse_id_var() 1057 1058 self._match(TokenType.IS) 1059 1060 return self.expression( 1061 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1062 ) 1063 1064 def _parse_to_table( 1065 self, 1066 ) -> exp.ToTableProperty: 1067 table = self._parse_table_parts(schema=True) 1068 return self.expression(exp.ToTableProperty, this=table) 1069 1070 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1071 def _parse_ttl(self) -> exp.Expression: 1072 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1073 this = self._parse_bitwise() 1074 1075 if self._match_text_seq("DELETE"): 1076 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1077 if self._match_text_seq("RECOMPRESS"): 1078 return self.expression( 1079 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1080 ) 1081 if self._match_text_seq("TO", "DISK"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1084 ) 1085 if self._match_text_seq("TO", "VOLUME"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1088 ) 1089 1090 return this 1091 1092 expressions = self._parse_csv(_parse_ttl_action) 1093 where = self._parse_where() 1094 group = self._parse_group() 1095 1096 aggregates = None 1097 if group and self._match(TokenType.SET): 1098 aggregates = self._parse_csv(self._parse_set_item) 1099 1100 return self.expression( 1101 exp.MergeTreeTTL, 1102 expressions=expressions, 1103 where=where, 1104 group=group, 1105 aggregates=aggregates, 1106 ) 1107 1108 def _parse_statement(self) -> t.Optional[exp.Expression]: 1109 if self._curr is None: 1110 return None 1111 1112 if self._match_set(self.STATEMENT_PARSERS): 1113 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1114 1115 if self._match_set(Tokenizer.COMMANDS): 1116 return self._parse_command() 1117 1118 expression = self._parse_expression() 1119 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1120 return self._parse_query_modifiers(expression) 1121 1122 def _parse_drop(self) -> exp.Drop | exp.Command: 1123 start = self._prev 1124 temporary = self._match(TokenType.TEMPORARY) 1125 materialized = self._match_text_seq("MATERIALIZED") 1126 1127 kind = self._match_set(self.CREATABLES) and self._prev.text 1128 if not kind: 1129 return self._parse_as_command(start) 1130 1131 return self.expression( 1132 exp.Drop, 1133 exists=self._parse_exists(), 1134 this=self._parse_table(schema=True), 1135 kind=kind, 1136 temporary=temporary, 1137 materialized=materialized, 1138 cascade=self._match_text_seq("CASCADE"), 1139 constraints=self._match_text_seq("CONSTRAINTS"), 1140 purge=self._match_text_seq("PURGE"), 1141 ) 1142 1143 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1144 return ( 1145 self._match(TokenType.IF) 1146 and (not not_ or self._match(TokenType.NOT)) 1147 and self._match(TokenType.EXISTS) 1148 ) 1149 1150 def _parse_create(self) -> exp.Create | exp.Command: 1151 # Note: this can't be None because we've matched a statement parser 1152 start = self._prev 1153 replace = start.text.upper() == "REPLACE" or self._match_pair( 1154 TokenType.OR, TokenType.REPLACE 1155 ) 1156 unique = self._match(TokenType.UNIQUE) 1157 1158 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1159 self._advance() 1160 1161 properties = None 1162 create_token = self._match_set(self.CREATABLES) and self._prev 1163 1164 if not create_token: 1165 # exp.Properties.Location.POST_CREATE 1166 properties = self._parse_properties() 1167 create_token = self._match_set(self.CREATABLES) and self._prev 1168 1169 if not properties or not create_token: 1170 return self._parse_as_command(start) 1171 1172 exists = self._parse_exists(not_=True) 1173 this = None 1174 expression = None 1175 indexes = None 1176 no_schema_binding = None 1177 begin = None 1178 clone = None 1179 1180 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1181 nonlocal properties 1182 if properties and temp_props: 1183 properties.expressions.extend(temp_props.expressions) 1184 elif temp_props: 1185 properties = temp_props 1186 1187 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1188 this = self._parse_user_defined_function(kind=create_token.token_type) 1189 1190 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1191 extend_props(self._parse_properties()) 1192 1193 self._match(TokenType.ALIAS) 1194 begin = self._match(TokenType.BEGIN) 1195 return_ = self._match_text_seq("RETURN") 1196 expression = self._parse_statement() 1197 1198 if return_: 1199 expression = self.expression(exp.Return, this=expression) 1200 elif create_token.token_type == TokenType.INDEX: 1201 this = self._parse_index(index=self._parse_id_var()) 1202 elif create_token.token_type in self.DB_CREATABLES: 1203 table_parts = self._parse_table_parts(schema=True) 1204 1205 # exp.Properties.Location.POST_NAME 1206 self._match(TokenType.COMMA) 1207 extend_props(self._parse_properties(before=True)) 1208 1209 this = self._parse_schema(this=table_parts) 1210 1211 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1212 extend_props(self._parse_properties()) 1213 1214 self._match(TokenType.ALIAS) 1215 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1216 # exp.Properties.Location.POST_ALIAS 1217 extend_props(self._parse_properties()) 1218 1219 expression = self._parse_ddl_select() 1220 1221 if create_token.token_type == TokenType.TABLE: 1222 indexes = [] 1223 while True: 1224 index = self._parse_index() 1225 1226 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1227 extend_props(self._parse_properties()) 1228 1229 if not index: 1230 break 1231 else: 1232 self._match(TokenType.COMMA) 1233 indexes.append(index) 1234 elif create_token.token_type == TokenType.VIEW: 1235 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1236 no_schema_binding = True 1237 1238 if self._match_text_seq("CLONE"): 1239 clone = self._parse_table(schema=True) 1240 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1241 clone_kind = ( 1242 self._match(TokenType.L_PAREN) 1243 and self._match_texts(self.CLONE_KINDS) 1244 and self._prev.text.upper() 1245 ) 1246 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1247 self._match(TokenType.R_PAREN) 1248 clone = self.expression( 1249 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1250 ) 1251 1252 return self.expression( 1253 exp.Create, 1254 this=this, 1255 kind=create_token.text, 1256 replace=replace, 1257 unique=unique, 1258 expression=expression, 1259 exists=exists, 1260 properties=properties, 1261 indexes=indexes, 1262 no_schema_binding=no_schema_binding, 1263 begin=begin, 1264 clone=clone, 1265 ) 1266 1267 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1268 # only used for teradata currently 1269 self._match(TokenType.COMMA) 1270 1271 kwargs = { 1272 "no": self._match_text_seq("NO"), 1273 "dual": self._match_text_seq("DUAL"), 1274 "before": self._match_text_seq("BEFORE"), 1275 "default": self._match_text_seq("DEFAULT"), 1276 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1277 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1278 "after": self._match_text_seq("AFTER"), 1279 "minimum": self._match_texts(("MIN", "MINIMUM")), 1280 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1281 } 1282 1283 if self._match_texts(self.PROPERTY_PARSERS): 1284 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1285 try: 1286 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1287 except TypeError: 1288 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1289 1290 return None 1291 1292 def _parse_property(self) -> t.Optional[exp.Expression]: 1293 if self._match_texts(self.PROPERTY_PARSERS): 1294 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1295 1296 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1297 return self._parse_character_set(default=True) 1298 1299 if self._match_text_seq("COMPOUND", "SORTKEY"): 1300 return self._parse_sortkey(compound=True) 1301 1302 if self._match_text_seq("SQL", "SECURITY"): 1303 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1304 1305 assignment = self._match_pair( 1306 TokenType.VAR, TokenType.EQ, advance=False 1307 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1308 1309 if assignment: 1310 key = self._parse_var_or_string() 1311 self._match(TokenType.EQ) 1312 return self.expression(exp.Property, this=key, value=self._parse_column()) 1313 1314 return None 1315 1316 def _parse_stored(self) -> exp.FileFormatProperty: 1317 self._match(TokenType.ALIAS) 1318 1319 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1320 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1321 1322 return self.expression( 1323 exp.FileFormatProperty, 1324 this=self.expression( 1325 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1326 ) 1327 if input_format or output_format 1328 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1329 ) 1330 1331 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1332 self._match(TokenType.EQ) 1333 self._match(TokenType.ALIAS) 1334 return self.expression(exp_class, this=self._parse_field()) 1335 1336 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1337 properties = [] 1338 while True: 1339 if before: 1340 prop = self._parse_property_before() 1341 else: 1342 prop = self._parse_property() 1343 1344 if not prop: 1345 break 1346 for p in ensure_list(prop): 1347 properties.append(p) 1348 1349 if properties: 1350 return self.expression(exp.Properties, expressions=properties) 1351 1352 return None 1353 1354 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1355 return self.expression( 1356 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1357 ) 1358 1359 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1360 if self._index >= 2: 1361 pre_volatile_token = self._tokens[self._index - 2] 1362 else: 1363 pre_volatile_token = None 1364 1365 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1366 return exp.VolatileProperty() 1367 1368 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1369 1370 def _parse_with_property( 1371 self, 1372 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1373 self._match(TokenType.WITH) 1374 if self._match(TokenType.L_PAREN, advance=False): 1375 return self._parse_wrapped_csv(self._parse_property) 1376 1377 if self._match_text_seq("JOURNAL"): 1378 return self._parse_withjournaltable() 1379 1380 if self._match_text_seq("DATA"): 1381 return self._parse_withdata(no=False) 1382 elif self._match_text_seq("NO", "DATA"): 1383 return self._parse_withdata(no=True) 1384 1385 if not self._next: 1386 return None 1387 1388 return self._parse_withisolatedloading() 1389 1390 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1391 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1392 self._match(TokenType.EQ) 1393 1394 user = self._parse_id_var() 1395 self._match(TokenType.PARAMETER) 1396 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1397 1398 if not user or not host: 1399 return None 1400 1401 return exp.DefinerProperty(this=f"{user}@{host}") 1402 1403 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1404 self._match(TokenType.TABLE) 1405 self._match(TokenType.EQ) 1406 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1407 1408 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1409 return self.expression(exp.LogProperty, no=no) 1410 1411 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1412 return self.expression(exp.JournalProperty, **kwargs) 1413 1414 def _parse_checksum(self) -> exp.ChecksumProperty: 1415 self._match(TokenType.EQ) 1416 1417 on = None 1418 if self._match(TokenType.ON): 1419 on = True 1420 elif self._match_text_seq("OFF"): 1421 on = False 1422 1423 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1424 1425 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1426 if not self._match_text_seq("BY"): 1427 self._retreat(self._index - 1) 1428 return None 1429 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_freespace(self) -> exp.FreespaceProperty: 1433 self._match(TokenType.EQ) 1434 return self.expression( 1435 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1436 ) 1437 1438 def _parse_mergeblockratio( 1439 self, no: bool = False, default: bool = False 1440 ) -> exp.MergeBlockRatioProperty: 1441 if self._match(TokenType.EQ): 1442 return self.expression( 1443 exp.MergeBlockRatioProperty, 1444 this=self._parse_number(), 1445 percent=self._match(TokenType.PERCENT), 1446 ) 1447 1448 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1449 1450 def _parse_datablocksize( 1451 self, 1452 default: t.Optional[bool] = None, 1453 minimum: t.Optional[bool] = None, 1454 maximum: t.Optional[bool] = None, 1455 ) -> exp.DataBlocksizeProperty: 1456 self._match(TokenType.EQ) 1457 size = self._parse_number() 1458 1459 units = None 1460 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1461 units = self._prev.text 1462 1463 return self.expression( 1464 exp.DataBlocksizeProperty, 1465 size=size, 1466 units=units, 1467 default=default, 1468 minimum=minimum, 1469 maximum=maximum, 1470 ) 1471 1472 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1473 self._match(TokenType.EQ) 1474 always = self._match_text_seq("ALWAYS") 1475 manual = self._match_text_seq("MANUAL") 1476 never = self._match_text_seq("NEVER") 1477 default = self._match_text_seq("DEFAULT") 1478 1479 autotemp = None 1480 if self._match_text_seq("AUTOTEMP"): 1481 autotemp = self._parse_schema() 1482 1483 return self.expression( 1484 exp.BlockCompressionProperty, 1485 always=always, 1486 manual=manual, 1487 never=never, 1488 default=default, 1489 autotemp=autotemp, 1490 ) 1491 1492 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1493 no = self._match_text_seq("NO") 1494 concurrent = self._match_text_seq("CONCURRENT") 1495 self._match_text_seq("ISOLATED", "LOADING") 1496 for_all = self._match_text_seq("FOR", "ALL") 1497 for_insert = self._match_text_seq("FOR", "INSERT") 1498 for_none = self._match_text_seq("FOR", "NONE") 1499 return self.expression( 1500 exp.IsolatedLoadingProperty, 1501 no=no, 1502 concurrent=concurrent, 1503 for_all=for_all, 1504 for_insert=for_insert, 1505 for_none=for_none, 1506 ) 1507 1508 def _parse_locking(self) -> exp.LockingProperty: 1509 if self._match(TokenType.TABLE): 1510 kind = "TABLE" 1511 elif self._match(TokenType.VIEW): 1512 kind = "VIEW" 1513 elif self._match(TokenType.ROW): 1514 kind = "ROW" 1515 elif self._match_text_seq("DATABASE"): 1516 kind = "DATABASE" 1517 else: 1518 kind = None 1519 1520 if kind in ("DATABASE", "TABLE", "VIEW"): 1521 this = self._parse_table_parts() 1522 else: 1523 this = None 1524 1525 if self._match(TokenType.FOR): 1526 for_or_in = "FOR" 1527 elif self._match(TokenType.IN): 1528 for_or_in = "IN" 1529 else: 1530 for_or_in = None 1531 1532 if self._match_text_seq("ACCESS"): 1533 lock_type = "ACCESS" 1534 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1535 lock_type = "EXCLUSIVE" 1536 elif self._match_text_seq("SHARE"): 1537 lock_type = "SHARE" 1538 elif self._match_text_seq("READ"): 1539 lock_type = "READ" 1540 elif self._match_text_seq("WRITE"): 1541 lock_type = "WRITE" 1542 elif self._match_text_seq("CHECKSUM"): 1543 lock_type = "CHECKSUM" 1544 else: 1545 lock_type = None 1546 1547 override = self._match_text_seq("OVERRIDE") 1548 1549 return self.expression( 1550 exp.LockingProperty, 1551 this=this, 1552 kind=kind, 1553 for_or_in=for_or_in, 1554 lock_type=lock_type, 1555 override=override, 1556 ) 1557 1558 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1559 if self._match(TokenType.PARTITION_BY): 1560 return self._parse_csv(self._parse_conjunction) 1561 return [] 1562 1563 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1564 self._match(TokenType.EQ) 1565 return self.expression( 1566 exp.PartitionedByProperty, 1567 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1568 ) 1569 1570 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1571 if self._match_text_seq("AND", "STATISTICS"): 1572 statistics = True 1573 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1574 statistics = False 1575 else: 1576 statistics = None 1577 1578 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1579 1580 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1581 if self._match_text_seq("PRIMARY", "INDEX"): 1582 return exp.NoPrimaryIndexProperty() 1583 return None 1584 1585 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1586 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1587 return exp.OnCommitProperty() 1588 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1589 return exp.OnCommitProperty(delete=True) 1590 return None 1591 1592 def _parse_distkey(self) -> exp.DistKeyProperty: 1593 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1594 1595 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1596 table = self._parse_table(schema=True) 1597 1598 options = [] 1599 while self._match_texts(("INCLUDING", "EXCLUDING")): 1600 this = self._prev.text.upper() 1601 1602 id_var = self._parse_id_var() 1603 if not id_var: 1604 return None 1605 1606 options.append( 1607 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1608 ) 1609 1610 return self.expression(exp.LikeProperty, this=table, expressions=options) 1611 1612 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1613 return self.expression( 1614 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1615 ) 1616 1617 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1621 ) 1622 1623 def _parse_returns(self) -> exp.ReturnsProperty: 1624 value: t.Optional[exp.Expression] 1625 is_table = self._match(TokenType.TABLE) 1626 1627 if is_table: 1628 if self._match(TokenType.LT): 1629 value = self.expression( 1630 exp.Schema, 1631 this="TABLE", 1632 expressions=self._parse_csv(self._parse_struct_types), 1633 ) 1634 if not self._match(TokenType.GT): 1635 self.raise_error("Expecting >") 1636 else: 1637 value = self._parse_schema(exp.var("TABLE")) 1638 else: 1639 value = self._parse_types() 1640 1641 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1642 1643 def _parse_describe(self) -> exp.Describe: 1644 kind = self._match_set(self.CREATABLES) and self._prev.text 1645 this = self._parse_table() 1646 return self.expression(exp.Describe, this=this, kind=kind) 1647 1648 def _parse_insert(self) -> exp.Insert: 1649 overwrite = self._match(TokenType.OVERWRITE) 1650 local = self._match_text_seq("LOCAL") 1651 alternative = None 1652 1653 if self._match_text_seq("DIRECTORY"): 1654 this: t.Optional[exp.Expression] = self.expression( 1655 exp.Directory, 1656 this=self._parse_var_or_string(), 1657 local=local, 1658 row_format=self._parse_row_format(match_row=True), 1659 ) 1660 else: 1661 if self._match(TokenType.OR): 1662 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1663 1664 self._match(TokenType.INTO) 1665 self._match(TokenType.TABLE) 1666 this = self._parse_table(schema=True) 1667 1668 return self.expression( 1669 exp.Insert, 1670 this=this, 1671 exists=self._parse_exists(), 1672 partition=self._parse_partition(), 1673 expression=self._parse_ddl_select(), 1674 conflict=self._parse_on_conflict(), 1675 returning=self._parse_returning(), 1676 overwrite=overwrite, 1677 alternative=alternative, 1678 ) 1679 1680 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1681 conflict = self._match_text_seq("ON", "CONFLICT") 1682 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1683 1684 if not conflict and not duplicate: 1685 return None 1686 1687 nothing = None 1688 expressions = None 1689 key = None 1690 constraint = None 1691 1692 if conflict: 1693 if self._match_text_seq("ON", "CONSTRAINT"): 1694 constraint = self._parse_id_var() 1695 else: 1696 key = self._parse_csv(self._parse_value) 1697 1698 self._match_text_seq("DO") 1699 if self._match_text_seq("NOTHING"): 1700 nothing = True 1701 else: 1702 self._match(TokenType.UPDATE) 1703 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1704 1705 return self.expression( 1706 exp.OnConflict, 1707 duplicate=duplicate, 1708 expressions=expressions, 1709 nothing=nothing, 1710 key=key, 1711 constraint=constraint, 1712 ) 1713 1714 def _parse_returning(self) -> t.Optional[exp.Returning]: 1715 if not self._match(TokenType.RETURNING): 1716 return None 1717 1718 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1719 1720 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1721 if not self._match(TokenType.FORMAT): 1722 return None 1723 return self._parse_row_format() 1724 1725 def _parse_row_format( 1726 self, match_row: bool = False 1727 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1728 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1729 return None 1730 1731 if self._match_text_seq("SERDE"): 1732 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1733 1734 self._match_text_seq("DELIMITED") 1735 1736 kwargs = {} 1737 1738 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1739 kwargs["fields"] = self._parse_string() 1740 if self._match_text_seq("ESCAPED", "BY"): 1741 kwargs["escaped"] = self._parse_string() 1742 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1743 kwargs["collection_items"] = self._parse_string() 1744 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1745 kwargs["map_keys"] = self._parse_string() 1746 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1747 kwargs["lines"] = self._parse_string() 1748 if self._match_text_seq("NULL", "DEFINED", "AS"): 1749 kwargs["null"] = self._parse_string() 1750 1751 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1752 1753 def _parse_load(self) -> exp.LoadData | exp.Command: 1754 if self._match_text_seq("DATA"): 1755 local = self._match_text_seq("LOCAL") 1756 self._match_text_seq("INPATH") 1757 inpath = self._parse_string() 1758 overwrite = self._match(TokenType.OVERWRITE) 1759 self._match_pair(TokenType.INTO, TokenType.TABLE) 1760 1761 return self.expression( 1762 exp.LoadData, 1763 this=self._parse_table(schema=True), 1764 local=local, 1765 overwrite=overwrite, 1766 inpath=inpath, 1767 partition=self._parse_partition(), 1768 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1769 serde=self._match_text_seq("SERDE") and self._parse_string(), 1770 ) 1771 return self._parse_as_command(self._prev) 1772 1773 def _parse_delete(self) -> exp.Delete: 1774 self._match(TokenType.FROM) 1775 1776 return self.expression( 1777 exp.Delete, 1778 this=self._parse_table(), 1779 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1780 where=self._parse_where(), 1781 returning=self._parse_returning(), 1782 ) 1783 1784 def _parse_update(self) -> exp.Update: 1785 return self.expression( 1786 exp.Update, 1787 **{ # type: ignore 1788 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1789 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1790 "from": self._parse_from(modifiers=True), 1791 "where": self._parse_where(), 1792 "returning": self._parse_returning(), 1793 }, 1794 ) 1795 1796 def _parse_uncache(self) -> exp.Uncache: 1797 if not self._match(TokenType.TABLE): 1798 self.raise_error("Expecting TABLE after UNCACHE") 1799 1800 return self.expression( 1801 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1802 ) 1803 1804 def _parse_cache(self) -> exp.Cache: 1805 lazy = self._match_text_seq("LAZY") 1806 self._match(TokenType.TABLE) 1807 table = self._parse_table(schema=True) 1808 1809 options = [] 1810 if self._match_text_seq("OPTIONS"): 1811 self._match_l_paren() 1812 k = self._parse_string() 1813 self._match(TokenType.EQ) 1814 v = self._parse_string() 1815 options = [k, v] 1816 self._match_r_paren() 1817 1818 self._match(TokenType.ALIAS) 1819 return self.expression( 1820 exp.Cache, 1821 this=table, 1822 lazy=lazy, 1823 options=options, 1824 expression=self._parse_select(nested=True), 1825 ) 1826 1827 def _parse_partition(self) -> t.Optional[exp.Partition]: 1828 if not self._match(TokenType.PARTITION): 1829 return None 1830 1831 return self.expression( 1832 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1833 ) 1834 1835 def _parse_value(self) -> exp.Tuple: 1836 if self._match(TokenType.L_PAREN): 1837 expressions = self._parse_csv(self._parse_conjunction) 1838 self._match_r_paren() 1839 return self.expression(exp.Tuple, expressions=expressions) 1840 1841 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1842 # Source: https://prestodb.io/docs/current/sql/values.html 1843 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1844 1845 def _parse_select( 1846 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1847 ) -> t.Optional[exp.Expression]: 1848 cte = self._parse_with() 1849 if cte: 1850 this = self._parse_statement() 1851 1852 if not this: 1853 self.raise_error("Failed to parse any statement following CTE") 1854 return cte 1855 1856 if "with" in this.arg_types: 1857 this.set("with", cte) 1858 else: 1859 self.raise_error(f"{this.key} does not support CTE") 1860 this = cte 1861 elif self._match(TokenType.SELECT): 1862 comments = self._prev_comments 1863 1864 hint = self._parse_hint() 1865 all_ = self._match(TokenType.ALL) 1866 distinct = self._match(TokenType.DISTINCT) 1867 1868 kind = ( 1869 self._match(TokenType.ALIAS) 1870 and self._match_texts(("STRUCT", "VALUE")) 1871 and self._prev.text 1872 ) 1873 1874 if distinct: 1875 distinct = self.expression( 1876 exp.Distinct, 1877 on=self._parse_value() if self._match(TokenType.ON) else None, 1878 ) 1879 1880 if all_ and distinct: 1881 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1882 1883 limit = self._parse_limit(top=True) 1884 expressions = self._parse_csv(self._parse_expression) 1885 1886 this = self.expression( 1887 exp.Select, 1888 kind=kind, 1889 hint=hint, 1890 distinct=distinct, 1891 expressions=expressions, 1892 limit=limit, 1893 ) 1894 this.comments = comments 1895 1896 into = self._parse_into() 1897 if into: 1898 this.set("into", into) 1899 1900 from_ = self._parse_from() 1901 if from_: 1902 this.set("from", from_) 1903 1904 this = self._parse_query_modifiers(this) 1905 elif (table or nested) and self._match(TokenType.L_PAREN): 1906 if self._match(TokenType.PIVOT): 1907 this = self._parse_simplified_pivot() 1908 elif self._match(TokenType.FROM): 1909 this = exp.select("*").from_( 1910 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1911 ) 1912 else: 1913 this = self._parse_table() if table else self._parse_select(nested=True) 1914 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1915 1916 self._match_r_paren() 1917 1918 # early return so that subquery unions aren't parsed again 1919 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1920 # Union ALL should be a property of the top select node, not the subquery 1921 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1922 elif self._match(TokenType.VALUES): 1923 this = self.expression( 1924 exp.Values, 1925 expressions=self._parse_csv(self._parse_value), 1926 alias=self._parse_table_alias(), 1927 ) 1928 else: 1929 this = None 1930 1931 return self._parse_set_operations(this) 1932 1933 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1934 if not skip_with_token and not self._match(TokenType.WITH): 1935 return None 1936 1937 comments = self._prev_comments 1938 recursive = self._match(TokenType.RECURSIVE) 1939 1940 expressions = [] 1941 while True: 1942 expressions.append(self._parse_cte()) 1943 1944 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1945 break 1946 else: 1947 self._match(TokenType.WITH) 1948 1949 return self.expression( 1950 exp.With, comments=comments, expressions=expressions, recursive=recursive 1951 ) 1952 1953 def _parse_cte(self) -> exp.CTE: 1954 alias = self._parse_table_alias() 1955 if not alias or not alias.this: 1956 self.raise_error("Expected CTE to have alias") 1957 1958 self._match(TokenType.ALIAS) 1959 return self.expression( 1960 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1961 ) 1962 1963 def _parse_table_alias( 1964 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1965 ) -> t.Optional[exp.TableAlias]: 1966 any_token = self._match(TokenType.ALIAS) 1967 alias = ( 1968 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1969 or self._parse_string_as_identifier() 1970 ) 1971 1972 index = self._index 1973 if self._match(TokenType.L_PAREN): 1974 columns = self._parse_csv(self._parse_function_parameter) 1975 self._match_r_paren() if columns else self._retreat(index) 1976 else: 1977 columns = None 1978 1979 if not alias and not columns: 1980 return None 1981 1982 return self.expression(exp.TableAlias, this=alias, columns=columns) 1983 1984 def _parse_subquery( 1985 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1986 ) -> t.Optional[exp.Subquery]: 1987 if not this: 1988 return None 1989 1990 return self.expression( 1991 exp.Subquery, 1992 this=this, 1993 pivots=self._parse_pivots(), 1994 alias=self._parse_table_alias() if parse_alias else None, 1995 ) 1996 1997 def _parse_query_modifiers( 1998 self, this: t.Optional[exp.Expression] 1999 ) -> t.Optional[exp.Expression]: 2000 if isinstance(this, self.MODIFIABLES): 2001 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2002 expression = parser(self) 2003 2004 if expression: 2005 if key == "limit": 2006 offset = expression.args.pop("offset", None) 2007 if offset: 2008 this.set("offset", exp.Offset(expression=offset)) 2009 this.set(key, expression) 2010 return this 2011 2012 def _parse_hint(self) -> t.Optional[exp.Hint]: 2013 if self._match(TokenType.HINT): 2014 hints = self._parse_csv(self._parse_function) 2015 2016 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2017 self.raise_error("Expected */ after HINT") 2018 2019 return self.expression(exp.Hint, expressions=hints) 2020 2021 return None 2022 2023 def _parse_into(self) -> t.Optional[exp.Into]: 2024 if not self._match(TokenType.INTO): 2025 return None 2026 2027 temp = self._match(TokenType.TEMPORARY) 2028 unlogged = self._match_text_seq("UNLOGGED") 2029 self._match(TokenType.TABLE) 2030 2031 return self.expression( 2032 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2033 ) 2034 2035 def _parse_from( 2036 self, modifiers: bool = False, skip_from_token: bool = False 2037 ) -> t.Optional[exp.From]: 2038 if not skip_from_token and not self._match(TokenType.FROM): 2039 return None 2040 2041 comments = self._prev_comments 2042 this = self._parse_table() 2043 2044 return self.expression( 2045 exp.From, 2046 comments=comments, 2047 this=self._parse_query_modifiers(this) if modifiers else this, 2048 ) 2049 2050 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2051 if not self._match(TokenType.MATCH_RECOGNIZE): 2052 return None 2053 2054 self._match_l_paren() 2055 2056 partition = self._parse_partition_by() 2057 order = self._parse_order() 2058 measures = ( 2059 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2060 ) 2061 2062 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2063 rows = exp.var("ONE ROW PER MATCH") 2064 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2065 text = "ALL ROWS PER MATCH" 2066 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2067 text += f" SHOW EMPTY MATCHES" 2068 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2069 text += f" OMIT EMPTY MATCHES" 2070 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2071 text += f" WITH UNMATCHED ROWS" 2072 rows = exp.var(text) 2073 else: 2074 rows = None 2075 2076 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2077 text = "AFTER MATCH SKIP" 2078 if self._match_text_seq("PAST", "LAST", "ROW"): 2079 text += f" PAST LAST ROW" 2080 elif self._match_text_seq("TO", "NEXT", "ROW"): 2081 text += f" TO NEXT ROW" 2082 elif self._match_text_seq("TO", "FIRST"): 2083 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2084 elif self._match_text_seq("TO", "LAST"): 2085 text += f" TO LAST {self._advance_any().text}" # type: ignore 2086 after = exp.var(text) 2087 else: 2088 after = None 2089 2090 if self._match_text_seq("PATTERN"): 2091 self._match_l_paren() 2092 2093 if not self._curr: 2094 self.raise_error("Expecting )", self._curr) 2095 2096 paren = 1 2097 start = self._curr 2098 2099 while self._curr and paren > 0: 2100 if self._curr.token_type == TokenType.L_PAREN: 2101 paren += 1 2102 if self._curr.token_type == TokenType.R_PAREN: 2103 paren -= 1 2104 2105 end = self._prev 2106 self._advance() 2107 2108 if paren > 0: 2109 self.raise_error("Expecting )", self._curr) 2110 2111 pattern = exp.var(self._find_sql(start, end)) 2112 else: 2113 pattern = None 2114 2115 define = ( 2116 self._parse_csv( 2117 lambda: self.expression( 2118 exp.Alias, 2119 alias=self._parse_id_var(any_token=True), 2120 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2121 ) 2122 ) 2123 if self._match_text_seq("DEFINE") 2124 else None 2125 ) 2126 2127 self._match_r_paren() 2128 2129 return self.expression( 2130 exp.MatchRecognize, 2131 partition_by=partition, 2132 order=order, 2133 measures=measures, 2134 rows=rows, 2135 after=after, 2136 pattern=pattern, 2137 define=define, 2138 alias=self._parse_table_alias(), 2139 ) 2140 2141 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2142 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2143 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2144 2145 if outer_apply or cross_apply: 2146 this = self._parse_select(table=True) 2147 view = None 2148 outer = not cross_apply 2149 elif self._match(TokenType.LATERAL): 2150 this = self._parse_select(table=True) 2151 view = self._match(TokenType.VIEW) 2152 outer = self._match(TokenType.OUTER) 2153 else: 2154 return None 2155 2156 if not this: 2157 this = self._parse_function() or self._parse_id_var(any_token=False) 2158 while self._match(TokenType.DOT): 2159 this = exp.Dot( 2160 this=this, 2161 expression=self._parse_function() or self._parse_id_var(any_token=False), 2162 ) 2163 2164 if view: 2165 table = self._parse_id_var(any_token=False) 2166 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2167 table_alias: t.Optional[exp.TableAlias] = self.expression( 2168 exp.TableAlias, this=table, columns=columns 2169 ) 2170 elif isinstance(this, exp.Subquery) and this.alias: 2171 # Ensures parity between the Subquery's and the Lateral's "alias" args 2172 table_alias = this.args["alias"].copy() 2173 else: 2174 table_alias = self._parse_table_alias() 2175 2176 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2177 2178 def _parse_join_parts( 2179 self, 2180 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2181 return ( 2182 self._match_set(self.JOIN_METHODS) and self._prev, 2183 self._match_set(self.JOIN_SIDES) and self._prev, 2184 self._match_set(self.JOIN_KINDS) and self._prev, 2185 ) 2186 2187 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2188 if self._match(TokenType.COMMA): 2189 return self.expression(exp.Join, this=self._parse_table()) 2190 2191 index = self._index 2192 method, side, kind = self._parse_join_parts() 2193 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2194 join = self._match(TokenType.JOIN) 2195 2196 if not skip_join_token and not join: 2197 self._retreat(index) 2198 kind = None 2199 method = None 2200 side = None 2201 2202 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2203 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2204 2205 if not skip_join_token and not join and not outer_apply and not cross_apply: 2206 return None 2207 2208 if outer_apply: 2209 side = Token(TokenType.LEFT, "LEFT") 2210 2211 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2212 2213 if method: 2214 kwargs["method"] = method.text 2215 if side: 2216 kwargs["side"] = side.text 2217 if kind: 2218 kwargs["kind"] = kind.text 2219 if hint: 2220 kwargs["hint"] = hint 2221 2222 if self._match(TokenType.ON): 2223 kwargs["on"] = self._parse_conjunction() 2224 elif self._match(TokenType.USING): 2225 kwargs["using"] = self._parse_wrapped_id_vars() 2226 2227 return self.expression(exp.Join, **kwargs) 2228 2229 def _parse_index( 2230 self, 2231 index: t.Optional[exp.Expression] = None, 2232 ) -> t.Optional[exp.Index]: 2233 if index: 2234 unique = None 2235 primary = None 2236 amp = None 2237 2238 self._match(TokenType.ON) 2239 self._match(TokenType.TABLE) # hive 2240 table = self._parse_table_parts(schema=True) 2241 else: 2242 unique = self._match(TokenType.UNIQUE) 2243 primary = self._match_text_seq("PRIMARY") 2244 amp = self._match_text_seq("AMP") 2245 2246 if not self._match(TokenType.INDEX): 2247 return None 2248 2249 index = self._parse_id_var() 2250 table = None 2251 2252 using = self._parse_field() if self._match(TokenType.USING) else None 2253 2254 if self._match(TokenType.L_PAREN, advance=False): 2255 columns = self._parse_wrapped_csv(self._parse_ordered) 2256 else: 2257 columns = None 2258 2259 return self.expression( 2260 exp.Index, 2261 this=index, 2262 table=table, 2263 using=using, 2264 columns=columns, 2265 unique=unique, 2266 primary=primary, 2267 amp=amp, 2268 partition_by=self._parse_partition_by(), 2269 ) 2270 2271 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2272 return ( 2273 (not schema and self._parse_function(optional_parens=False)) 2274 or self._parse_id_var(any_token=False) 2275 or self._parse_string_as_identifier() 2276 or self._parse_placeholder() 2277 ) 2278 2279 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2280 catalog = None 2281 db = None 2282 table = self._parse_table_part(schema=schema) 2283 2284 while self._match(TokenType.DOT): 2285 if catalog: 2286 # This allows nesting the table in arbitrarily many dot expressions if needed 2287 table = self.expression( 2288 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2289 ) 2290 else: 2291 catalog = db 2292 db = table 2293 table = self._parse_table_part(schema=schema) 2294 2295 if not table: 2296 self.raise_error(f"Expected table name but got {self._curr}") 2297 2298 return self.expression( 2299 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2300 ) 2301 2302 def _parse_table( 2303 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2304 ) -> t.Optional[exp.Expression]: 2305 lateral = self._parse_lateral() 2306 if lateral: 2307 return lateral 2308 2309 unnest = self._parse_unnest() 2310 if unnest: 2311 return unnest 2312 2313 values = self._parse_derived_table_values() 2314 if values: 2315 return values 2316 2317 subquery = self._parse_select(table=True) 2318 if subquery: 2319 if not subquery.args.get("pivots"): 2320 subquery.set("pivots", self._parse_pivots()) 2321 return subquery 2322 2323 this: exp.Expression = self._parse_table_parts(schema=schema) 2324 2325 if schema: 2326 return self._parse_schema(this=this) 2327 2328 if self.ALIAS_POST_TABLESAMPLE: 2329 table_sample = self._parse_table_sample() 2330 2331 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2332 if alias: 2333 this.set("alias", alias) 2334 2335 if not this.args.get("pivots"): 2336 this.set("pivots", self._parse_pivots()) 2337 2338 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2339 this.set( 2340 "hints", 2341 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2342 ) 2343 self._match_r_paren() 2344 2345 if not self.ALIAS_POST_TABLESAMPLE: 2346 table_sample = self._parse_table_sample() 2347 2348 if table_sample: 2349 table_sample.set("this", this) 2350 this = table_sample 2351 2352 return this 2353 2354 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2355 if not self._match(TokenType.UNNEST): 2356 return None 2357 2358 expressions = self._parse_wrapped_csv(self._parse_type) 2359 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2360 2361 alias = self._parse_table_alias() if with_alias else None 2362 2363 if alias and self.UNNEST_COLUMN_ONLY: 2364 if alias.args.get("columns"): 2365 self.raise_error("Unexpected extra column alias in unnest.") 2366 2367 alias.set("columns", [alias.this]) 2368 alias.set("this", None) 2369 2370 offset = None 2371 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2372 self._match(TokenType.ALIAS) 2373 offset = self._parse_id_var() or exp.to_identifier("offset") 2374 2375 return self.expression( 2376 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2377 ) 2378 2379 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2380 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2381 if not is_derived and not self._match(TokenType.VALUES): 2382 return None 2383 2384 expressions = self._parse_csv(self._parse_value) 2385 alias = self._parse_table_alias() 2386 2387 if is_derived: 2388 self._match_r_paren() 2389 2390 return self.expression( 2391 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2392 ) 2393 2394 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2395 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2396 as_modifier and self._match_text_seq("USING", "SAMPLE") 2397 ): 2398 return None 2399 2400 bucket_numerator = None 2401 bucket_denominator = None 2402 bucket_field = None 2403 percent = None 2404 rows = None 2405 size = None 2406 seed = None 2407 2408 kind = ( 2409 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2410 ) 2411 method = self._parse_var(tokens=(TokenType.ROW,)) 2412 2413 self._match(TokenType.L_PAREN) 2414 2415 num = self._parse_number() 2416 2417 if self._match_text_seq("BUCKET"): 2418 bucket_numerator = self._parse_number() 2419 self._match_text_seq("OUT", "OF") 2420 bucket_denominator = bucket_denominator = self._parse_number() 2421 self._match(TokenType.ON) 2422 bucket_field = self._parse_field() 2423 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2424 percent = num 2425 elif self._match(TokenType.ROWS): 2426 rows = num 2427 else: 2428 size = num 2429 2430 self._match(TokenType.R_PAREN) 2431 2432 if self._match(TokenType.L_PAREN): 2433 method = self._parse_var() 2434 seed = self._match(TokenType.COMMA) and self._parse_number() 2435 self._match_r_paren() 2436 elif self._match_texts(("SEED", "REPEATABLE")): 2437 seed = self._parse_wrapped(self._parse_number) 2438 2439 return self.expression( 2440 exp.TableSample, 2441 method=method, 2442 bucket_numerator=bucket_numerator, 2443 bucket_denominator=bucket_denominator, 2444 bucket_field=bucket_field, 2445 percent=percent, 2446 rows=rows, 2447 size=size, 2448 seed=seed, 2449 kind=kind, 2450 ) 2451 2452 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2453 return list(iter(self._parse_pivot, None)) 2454 2455 # https://duckdb.org/docs/sql/statements/pivot 2456 def _parse_simplified_pivot(self) -> exp.Pivot: 2457 def _parse_on() -> t.Optional[exp.Expression]: 2458 this = self._parse_bitwise() 2459 return self._parse_in(this) if self._match(TokenType.IN) else this 2460 2461 this = self._parse_table() 2462 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2463 using = self._match(TokenType.USING) and self._parse_csv( 2464 lambda: self._parse_alias(self._parse_function()) 2465 ) 2466 group = self._parse_group() 2467 return self.expression( 2468 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2469 ) 2470 2471 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2472 index = self._index 2473 2474 if self._match(TokenType.PIVOT): 2475 unpivot = False 2476 elif self._match(TokenType.UNPIVOT): 2477 unpivot = True 2478 else: 2479 return None 2480 2481 expressions = [] 2482 field = None 2483 2484 if not self._match(TokenType.L_PAREN): 2485 self._retreat(index) 2486 return None 2487 2488 if unpivot: 2489 expressions = self._parse_csv(self._parse_column) 2490 else: 2491 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2492 2493 if not expressions: 2494 self.raise_error("Failed to parse PIVOT's aggregation list") 2495 2496 if not self._match(TokenType.FOR): 2497 self.raise_error("Expecting FOR") 2498 2499 value = self._parse_column() 2500 2501 if not self._match(TokenType.IN): 2502 self.raise_error("Expecting IN") 2503 2504 field = self._parse_in(value, alias=True) 2505 2506 self._match_r_paren() 2507 2508 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2509 2510 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2511 pivot.set("alias", self._parse_table_alias()) 2512 2513 if not unpivot: 2514 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2515 2516 columns: t.List[exp.Expression] = [] 2517 for fld in pivot.args["field"].expressions: 2518 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2519 for name in names: 2520 if self.PREFIXED_PIVOT_COLUMNS: 2521 name = f"{name}_{field_name}" if name else field_name 2522 else: 2523 name = f"{field_name}_{name}" if name else field_name 2524 2525 columns.append(exp.to_identifier(name)) 2526 2527 pivot.set("columns", columns) 2528 2529 return pivot 2530 2531 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2532 return [agg.alias for agg in aggregations] 2533 2534 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2535 if not skip_where_token and not self._match(TokenType.WHERE): 2536 return None 2537 2538 return self.expression( 2539 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2540 ) 2541 2542 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2543 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2544 return None 2545 2546 elements = defaultdict(list) 2547 2548 while True: 2549 expressions = self._parse_csv(self._parse_conjunction) 2550 if expressions: 2551 elements["expressions"].extend(expressions) 2552 2553 grouping_sets = self._parse_grouping_sets() 2554 if grouping_sets: 2555 elements["grouping_sets"].extend(grouping_sets) 2556 2557 rollup = None 2558 cube = None 2559 totals = None 2560 2561 with_ = self._match(TokenType.WITH) 2562 if self._match(TokenType.ROLLUP): 2563 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2564 elements["rollup"].extend(ensure_list(rollup)) 2565 2566 if self._match(TokenType.CUBE): 2567 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2568 elements["cube"].extend(ensure_list(cube)) 2569 2570 if self._match_text_seq("TOTALS"): 2571 totals = True 2572 elements["totals"] = True # type: ignore 2573 2574 if not (grouping_sets or rollup or cube or totals): 2575 break 2576 2577 return self.expression(exp.Group, **elements) # type: ignore 2578 2579 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2580 if not self._match(TokenType.GROUPING_SETS): 2581 return None 2582 2583 return self._parse_wrapped_csv(self._parse_grouping_set) 2584 2585 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2586 if self._match(TokenType.L_PAREN): 2587 grouping_set = self._parse_csv(self._parse_column) 2588 self._match_r_paren() 2589 return self.expression(exp.Tuple, expressions=grouping_set) 2590 2591 return self._parse_column() 2592 2593 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2594 if not skip_having_token and not self._match(TokenType.HAVING): 2595 return None 2596 return self.expression(exp.Having, this=self._parse_conjunction()) 2597 2598 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2599 if not self._match(TokenType.QUALIFY): 2600 return None 2601 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2602 2603 def _parse_order( 2604 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2605 ) -> t.Optional[exp.Expression]: 2606 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2607 return this 2608 2609 return self.expression( 2610 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2611 ) 2612 2613 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2614 if not self._match_text_seq(*texts): 2615 return None 2616 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2617 2618 def _parse_ordered(self) -> exp.Ordered: 2619 this = self._parse_conjunction() 2620 self._match(TokenType.ASC) 2621 2622 is_desc = self._match(TokenType.DESC) 2623 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2624 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2625 desc = is_desc or False 2626 asc = not desc 2627 nulls_first = is_nulls_first or False 2628 explicitly_null_ordered = is_nulls_first or is_nulls_last 2629 2630 if ( 2631 not explicitly_null_ordered 2632 and ( 2633 (asc and self.NULL_ORDERING == "nulls_are_small") 2634 or (desc and self.NULL_ORDERING != "nulls_are_small") 2635 ) 2636 and self.NULL_ORDERING != "nulls_are_last" 2637 ): 2638 nulls_first = True 2639 2640 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2641 2642 def _parse_limit( 2643 self, this: t.Optional[exp.Expression] = None, top: bool = False 2644 ) -> t.Optional[exp.Expression]: 2645 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2646 limit_paren = self._match(TokenType.L_PAREN) 2647 expression = self._parse_number() if top else self._parse_term() 2648 2649 if self._match(TokenType.COMMA): 2650 offset = expression 2651 expression = self._parse_term() 2652 else: 2653 offset = None 2654 2655 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2656 2657 if limit_paren: 2658 self._match_r_paren() 2659 2660 return limit_exp 2661 2662 if self._match(TokenType.FETCH): 2663 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2664 direction = self._prev.text if direction else "FIRST" 2665 2666 count = self._parse_number() 2667 percent = self._match(TokenType.PERCENT) 2668 2669 self._match_set((TokenType.ROW, TokenType.ROWS)) 2670 2671 only = self._match_text_seq("ONLY") 2672 with_ties = self._match_text_seq("WITH", "TIES") 2673 2674 if only and with_ties: 2675 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2676 2677 return self.expression( 2678 exp.Fetch, 2679 direction=direction, 2680 count=count, 2681 percent=percent, 2682 with_ties=with_ties, 2683 ) 2684 2685 return this 2686 2687 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2688 if not self._match(TokenType.OFFSET): 2689 return this 2690 2691 count = self._parse_number() 2692 self._match_set((TokenType.ROW, TokenType.ROWS)) 2693 return self.expression(exp.Offset, this=this, expression=count) 2694 2695 def _parse_locks(self) -> t.List[exp.Lock]: 2696 locks = [] 2697 while True: 2698 if self._match_text_seq("FOR", "UPDATE"): 2699 update = True 2700 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2701 "LOCK", "IN", "SHARE", "MODE" 2702 ): 2703 update = False 2704 else: 2705 break 2706 2707 expressions = None 2708 if self._match_text_seq("OF"): 2709 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2710 2711 wait: t.Optional[bool | exp.Expression] = None 2712 if self._match_text_seq("NOWAIT"): 2713 wait = True 2714 elif self._match_text_seq("WAIT"): 2715 wait = self._parse_primary() 2716 elif self._match_text_seq("SKIP", "LOCKED"): 2717 wait = False 2718 2719 locks.append( 2720 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2721 ) 2722 2723 return locks 2724 2725 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2726 if not self._match_set(self.SET_OPERATIONS): 2727 return this 2728 2729 token_type = self._prev.token_type 2730 2731 if token_type == TokenType.UNION: 2732 expression = exp.Union 2733 elif token_type == TokenType.EXCEPT: 2734 expression = exp.Except 2735 else: 2736 expression = exp.Intersect 2737 2738 return self.expression( 2739 expression, 2740 this=this, 2741 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2742 expression=self._parse_set_operations(self._parse_select(nested=True)), 2743 ) 2744 2745 def _parse_expression(self) -> t.Optional[exp.Expression]: 2746 return self._parse_alias(self._parse_conjunction()) 2747 2748 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2749 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2750 2751 def _parse_equality(self) -> t.Optional[exp.Expression]: 2752 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2753 2754 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2755 return self._parse_tokens(self._parse_range, self.COMPARISON) 2756 2757 def _parse_range(self) -> t.Optional[exp.Expression]: 2758 this = self._parse_bitwise() 2759 negate = self._match(TokenType.NOT) 2760 2761 if self._match_set(self.RANGE_PARSERS): 2762 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2763 if not expression: 2764 return this 2765 2766 this = expression 2767 elif self._match(TokenType.ISNULL): 2768 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2769 2770 # Postgres supports ISNULL and NOTNULL for conditions. 2771 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2772 if self._match(TokenType.NOTNULL): 2773 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2774 this = self.expression(exp.Not, this=this) 2775 2776 if negate: 2777 this = self.expression(exp.Not, this=this) 2778 2779 if self._match(TokenType.IS): 2780 this = self._parse_is(this) 2781 2782 return this 2783 2784 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2785 index = self._index - 1 2786 negate = self._match(TokenType.NOT) 2787 2788 if self._match_text_seq("DISTINCT", "FROM"): 2789 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2790 return self.expression(klass, this=this, expression=self._parse_expression()) 2791 2792 expression = self._parse_null() or self._parse_boolean() 2793 if not expression: 2794 self._retreat(index) 2795 return None 2796 2797 this = self.expression(exp.Is, this=this, expression=expression) 2798 return self.expression(exp.Not, this=this) if negate else this 2799 2800 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2801 unnest = self._parse_unnest(with_alias=False) 2802 if unnest: 2803 this = self.expression(exp.In, this=this, unnest=unnest) 2804 elif self._match(TokenType.L_PAREN): 2805 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2806 2807 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2808 this = self.expression(exp.In, this=this, query=expressions[0]) 2809 else: 2810 this = self.expression(exp.In, this=this, expressions=expressions) 2811 2812 self._match_r_paren(this) 2813 else: 2814 this = self.expression(exp.In, this=this, field=self._parse_field()) 2815 2816 return this 2817 2818 def _parse_between(self, this: exp.Expression) -> exp.Between: 2819 low = self._parse_bitwise() 2820 self._match(TokenType.AND) 2821 high = self._parse_bitwise() 2822 return self.expression(exp.Between, this=this, low=low, high=high) 2823 2824 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2825 if not self._match(TokenType.ESCAPE): 2826 return this 2827 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2828 2829 def _parse_interval(self) -> t.Optional[exp.Interval]: 2830 if not self._match(TokenType.INTERVAL): 2831 return None 2832 2833 this = self._parse_primary() or self._parse_term() 2834 unit = self._parse_function() or self._parse_var() 2835 2836 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2837 # each INTERVAL expression into this canonical form so it's easy to transpile 2838 if this and this.is_number: 2839 this = exp.Literal.string(this.name) 2840 elif this and this.is_string: 2841 parts = this.name.split() 2842 2843 if len(parts) == 2: 2844 if unit: 2845 # this is not actually a unit, it's something else 2846 unit = None 2847 self._retreat(self._index - 1) 2848 else: 2849 this = exp.Literal.string(parts[0]) 2850 unit = self.expression(exp.Var, this=parts[1]) 2851 2852 return self.expression(exp.Interval, this=this, unit=unit) 2853 2854 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2855 this = self._parse_term() 2856 2857 while True: 2858 if self._match_set(self.BITWISE): 2859 this = self.expression( 2860 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2861 ) 2862 elif self._match_pair(TokenType.LT, TokenType.LT): 2863 this = self.expression( 2864 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2865 ) 2866 elif self._match_pair(TokenType.GT, TokenType.GT): 2867 this = self.expression( 2868 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2869 ) 2870 else: 2871 break 2872 2873 return this 2874 2875 def _parse_term(self) -> t.Optional[exp.Expression]: 2876 return self._parse_tokens(self._parse_factor, self.TERM) 2877 2878 def _parse_factor(self) -> t.Optional[exp.Expression]: 2879 return self._parse_tokens(self._parse_unary, self.FACTOR) 2880 2881 def _parse_unary(self) -> t.Optional[exp.Expression]: 2882 if self._match_set(self.UNARY_PARSERS): 2883 return self.UNARY_PARSERS[self._prev.token_type](self) 2884 return self._parse_at_time_zone(self._parse_type()) 2885 2886 def _parse_type(self) -> t.Optional[exp.Expression]: 2887 interval = self._parse_interval() 2888 if interval: 2889 return interval 2890 2891 index = self._index 2892 data_type = self._parse_types(check_func=True) 2893 this = self._parse_column() 2894 2895 if data_type: 2896 if isinstance(this, exp.Literal): 2897 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2898 if parser: 2899 return parser(self, this, data_type) 2900 return self.expression(exp.Cast, this=this, to=data_type) 2901 if not data_type.expressions: 2902 self._retreat(index) 2903 return self._parse_column() 2904 return self._parse_column_ops(data_type) 2905 2906 return this 2907 2908 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2909 this = self._parse_type() 2910 if not this: 2911 return None 2912 2913 return self.expression( 2914 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2915 ) 2916 2917 def _parse_types( 2918 self, check_func: bool = False, schema: bool = False 2919 ) -> t.Optional[exp.Expression]: 2920 index = self._index 2921 2922 prefix = self._match_text_seq("SYSUDTLIB", ".") 2923 2924 if not self._match_set(self.TYPE_TOKENS): 2925 return None 2926 2927 type_token = self._prev.token_type 2928 2929 if type_token == TokenType.PSEUDO_TYPE: 2930 return self.expression(exp.PseudoType, this=self._prev.text) 2931 2932 nested = type_token in self.NESTED_TYPE_TOKENS 2933 is_struct = type_token == TokenType.STRUCT 2934 expressions = None 2935 maybe_func = False 2936 2937 if self._match(TokenType.L_PAREN): 2938 if is_struct: 2939 expressions = self._parse_csv(self._parse_struct_types) 2940 elif nested: 2941 expressions = self._parse_csv( 2942 lambda: self._parse_types(check_func=check_func, schema=schema) 2943 ) 2944 elif type_token in self.ENUM_TYPE_TOKENS: 2945 expressions = self._parse_csv(self._parse_primary) 2946 else: 2947 expressions = self._parse_csv(self._parse_type_size) 2948 2949 if not expressions or not self._match(TokenType.R_PAREN): 2950 self._retreat(index) 2951 return None 2952 2953 maybe_func = True 2954 2955 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2956 this = exp.DataType( 2957 this=exp.DataType.Type.ARRAY, 2958 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2959 nested=True, 2960 ) 2961 2962 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2963 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2964 2965 return this 2966 2967 if self._match(TokenType.L_BRACKET): 2968 self._retreat(index) 2969 return None 2970 2971 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2972 if nested and self._match(TokenType.LT): 2973 if is_struct: 2974 expressions = self._parse_csv(self._parse_struct_types) 2975 else: 2976 expressions = self._parse_csv( 2977 lambda: self._parse_types(check_func=check_func, schema=schema) 2978 ) 2979 2980 if not self._match(TokenType.GT): 2981 self.raise_error("Expecting >") 2982 2983 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2984 values = self._parse_csv(self._parse_conjunction) 2985 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2986 2987 value: t.Optional[exp.Expression] = None 2988 if type_token in self.TIMESTAMPS: 2989 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2990 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2991 elif ( 2992 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2993 or type_token == TokenType.TIMESTAMPLTZ 2994 ): 2995 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2996 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2997 if type_token == TokenType.TIME: 2998 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2999 else: 3000 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3001 3002 maybe_func = maybe_func and value is None 3003 3004 if value is None: 3005 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3006 elif type_token == TokenType.INTERVAL: 3007 unit = self._parse_var() 3008 3009 if not unit: 3010 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3011 else: 3012 value = self.expression(exp.Interval, unit=unit) 3013 3014 if maybe_func and check_func: 3015 index2 = self._index 3016 peek = self._parse_string() 3017 3018 if not peek: 3019 self._retreat(index) 3020 return None 3021 3022 self._retreat(index2) 3023 3024 if value: 3025 return value 3026 3027 return exp.DataType( 3028 this=exp.DataType.Type[type_token.value.upper()], 3029 expressions=expressions, 3030 nested=nested, 3031 values=values, 3032 prefix=prefix, 3033 ) 3034 3035 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3036 this = self._parse_type() or self._parse_id_var() 3037 self._match(TokenType.COLON) 3038 return self._parse_column_def(this) 3039 3040 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3041 if not self._match_text_seq("AT", "TIME", "ZONE"): 3042 return this 3043 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3044 3045 def _parse_column(self) -> t.Optional[exp.Expression]: 3046 this = self._parse_field() 3047 if isinstance(this, exp.Identifier): 3048 this = self.expression(exp.Column, this=this) 3049 elif not this: 3050 return self._parse_bracket(this) 3051 return self._parse_column_ops(this) 3052 3053 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3054 this = self._parse_bracket(this) 3055 3056 while self._match_set(self.COLUMN_OPERATORS): 3057 op_token = self._prev.token_type 3058 op = self.COLUMN_OPERATORS.get(op_token) 3059 3060 if op_token == TokenType.DCOLON: 3061 field = self._parse_types() 3062 if not field: 3063 self.raise_error("Expected type") 3064 elif op and self._curr: 3065 self._advance() 3066 value = self._prev.text 3067 field = ( 3068 exp.Literal.number(value) 3069 if self._prev.token_type == TokenType.NUMBER 3070 else exp.Literal.string(value) 3071 ) 3072 else: 3073 field = self._parse_field(anonymous_func=True, any_token=True) 3074 3075 if isinstance(field, exp.Func): 3076 # bigquery allows function calls like x.y.count(...) 3077 # SAFE.SUBSTR(...) 3078 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3079 this = self._replace_columns_with_dots(this) 3080 3081 if op: 3082 this = op(self, this, field) 3083 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3084 this = self.expression( 3085 exp.Column, 3086 this=field, 3087 table=this.this, 3088 db=this.args.get("table"), 3089 catalog=this.args.get("db"), 3090 ) 3091 else: 3092 this = self.expression(exp.Dot, this=this, expression=field) 3093 this = self._parse_bracket(this) 3094 return this 3095 3096 def _parse_primary(self) -> t.Optional[exp.Expression]: 3097 if self._match_set(self.PRIMARY_PARSERS): 3098 token_type = self._prev.token_type 3099 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3100 3101 if token_type == TokenType.STRING: 3102 expressions = [primary] 3103 while self._match(TokenType.STRING): 3104 expressions.append(exp.Literal.string(self._prev.text)) 3105 3106 if len(expressions) > 1: 3107 return self.expression(exp.Concat, expressions=expressions) 3108 3109 return primary 3110 3111 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3112 return exp.Literal.number(f"0.{self._prev.text}") 3113 3114 if self._match(TokenType.L_PAREN): 3115 comments = self._prev_comments 3116 query = self._parse_select() 3117 3118 if query: 3119 expressions = [query] 3120 else: 3121 expressions = self._parse_csv(self._parse_expression) 3122 3123 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3124 3125 if isinstance(this, exp.Subqueryable): 3126 this = self._parse_set_operations( 3127 self._parse_subquery(this=this, parse_alias=False) 3128 ) 3129 elif len(expressions) > 1: 3130 this = self.expression(exp.Tuple, expressions=expressions) 3131 else: 3132 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3133 3134 if this: 3135 this.add_comments(comments) 3136 3137 self._match_r_paren(expression=this) 3138 return this 3139 3140 return None 3141 3142 def _parse_field( 3143 self, 3144 any_token: bool = False, 3145 tokens: t.Optional[t.Collection[TokenType]] = None, 3146 anonymous_func: bool = False, 3147 ) -> t.Optional[exp.Expression]: 3148 return ( 3149 self._parse_primary() 3150 or self._parse_function(anonymous=anonymous_func) 3151 or self._parse_id_var(any_token=any_token, tokens=tokens) 3152 ) 3153 3154 def _parse_function( 3155 self, 3156 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3157 anonymous: bool = False, 3158 optional_parens: bool = True, 3159 ) -> t.Optional[exp.Expression]: 3160 if not self._curr: 3161 return None 3162 3163 token_type = self._curr.token_type 3164 3165 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3166 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3167 3168 if not self._next or self._next.token_type != TokenType.L_PAREN: 3169 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3170 self._advance() 3171 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3172 3173 return None 3174 3175 if token_type not in self.FUNC_TOKENS: 3176 return None 3177 3178 this = self._curr.text 3179 upper = this.upper() 3180 self._advance(2) 3181 3182 parser = self.FUNCTION_PARSERS.get(upper) 3183 3184 if parser and not anonymous: 3185 this = parser(self) 3186 else: 3187 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3188 3189 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3190 this = self.expression(subquery_predicate, this=self._parse_select()) 3191 self._match_r_paren() 3192 return this 3193 3194 if functions is None: 3195 functions = self.FUNCTIONS 3196 3197 function = functions.get(upper) 3198 3199 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3200 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3201 3202 if function and not anonymous: 3203 this = self.validate_expression(function(args), args) 3204 else: 3205 this = self.expression(exp.Anonymous, this=this, expressions=args) 3206 3207 self._match_r_paren(this) 3208 return self._parse_window(this) 3209 3210 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3211 return self._parse_column_def(self._parse_id_var()) 3212 3213 def _parse_user_defined_function( 3214 self, kind: t.Optional[TokenType] = None 3215 ) -> t.Optional[exp.Expression]: 3216 this = self._parse_id_var() 3217 3218 while self._match(TokenType.DOT): 3219 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3220 3221 if not self._match(TokenType.L_PAREN): 3222 return this 3223 3224 expressions = self._parse_csv(self._parse_function_parameter) 3225 self._match_r_paren() 3226 return self.expression( 3227 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3228 ) 3229 3230 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3231 literal = self._parse_primary() 3232 if literal: 3233 return self.expression(exp.Introducer, this=token.text, expression=literal) 3234 3235 return self.expression(exp.Identifier, this=token.text) 3236 3237 def _parse_session_parameter(self) -> exp.SessionParameter: 3238 kind = None 3239 this = self._parse_id_var() or self._parse_primary() 3240 3241 if this and self._match(TokenType.DOT): 3242 kind = this.name 3243 this = self._parse_var() or self._parse_primary() 3244 3245 return self.expression(exp.SessionParameter, this=this, kind=kind) 3246 3247 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3248 index = self._index 3249 3250 if self._match(TokenType.L_PAREN): 3251 expressions = self._parse_csv(self._parse_id_var) 3252 3253 if not self._match(TokenType.R_PAREN): 3254 self._retreat(index) 3255 else: 3256 expressions = [self._parse_id_var()] 3257 3258 if self._match_set(self.LAMBDAS): 3259 return self.LAMBDAS[self._prev.token_type](self, expressions) 3260 3261 self._retreat(index) 3262 3263 this: t.Optional[exp.Expression] 3264 3265 if self._match(TokenType.DISTINCT): 3266 this = self.expression( 3267 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3268 ) 3269 else: 3270 this = self._parse_select_or_expression(alias=alias) 3271 3272 if isinstance(this, exp.EQ): 3273 left = this.this 3274 if isinstance(left, exp.Column): 3275 left.replace(exp.var(left.text("this"))) 3276 3277 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3278 3279 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3280 index = self._index 3281 3282 if not self.errors: 3283 try: 3284 if self._parse_select(nested=True): 3285 return this 3286 except ParseError: 3287 pass 3288 finally: 3289 self.errors.clear() 3290 self._retreat(index) 3291 3292 if not self._match(TokenType.L_PAREN): 3293 return this 3294 3295 args = self._parse_csv( 3296 lambda: self._parse_constraint() 3297 or self._parse_column_def(self._parse_field(any_token=True)) 3298 ) 3299 3300 self._match_r_paren() 3301 return self.expression(exp.Schema, this=this, expressions=args) 3302 3303 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3304 # column defs are not really columns, they're identifiers 3305 if isinstance(this, exp.Column): 3306 this = this.this 3307 3308 kind = self._parse_types(schema=True) 3309 3310 if self._match_text_seq("FOR", "ORDINALITY"): 3311 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3312 3313 constraints = [] 3314 while True: 3315 constraint = self._parse_column_constraint() 3316 if not constraint: 3317 break 3318 constraints.append(constraint) 3319 3320 if not kind and not constraints: 3321 return this 3322 3323 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3324 3325 def _parse_auto_increment( 3326 self, 3327 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3328 start = None 3329 increment = None 3330 3331 if self._match(TokenType.L_PAREN, advance=False): 3332 args = self._parse_wrapped_csv(self._parse_bitwise) 3333 start = seq_get(args, 0) 3334 increment = seq_get(args, 1) 3335 elif self._match_text_seq("START"): 3336 start = self._parse_bitwise() 3337 self._match_text_seq("INCREMENT") 3338 increment = self._parse_bitwise() 3339 3340 if start and increment: 3341 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3342 3343 return exp.AutoIncrementColumnConstraint() 3344 3345 def _parse_compress(self) -> exp.CompressColumnConstraint: 3346 if self._match(TokenType.L_PAREN, advance=False): 3347 return self.expression( 3348 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3349 ) 3350 3351 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3352 3353 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3354 if self._match_text_seq("BY", "DEFAULT"): 3355 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3356 this = self.expression( 3357 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3358 ) 3359 else: 3360 self._match_text_seq("ALWAYS") 3361 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3362 3363 self._match(TokenType.ALIAS) 3364 identity = self._match_text_seq("IDENTITY") 3365 3366 if self._match(TokenType.L_PAREN): 3367 if self._match_text_seq("START", "WITH"): 3368 this.set("start", self._parse_bitwise()) 3369 if self._match_text_seq("INCREMENT", "BY"): 3370 this.set("increment", self._parse_bitwise()) 3371 if self._match_text_seq("MINVALUE"): 3372 this.set("minvalue", self._parse_bitwise()) 3373 if self._match_text_seq("MAXVALUE"): 3374 this.set("maxvalue", self._parse_bitwise()) 3375 3376 if self._match_text_seq("CYCLE"): 3377 this.set("cycle", True) 3378 elif self._match_text_seq("NO", "CYCLE"): 3379 this.set("cycle", False) 3380 3381 if not identity: 3382 this.set("expression", self._parse_bitwise()) 3383 3384 self._match_r_paren() 3385 3386 return this 3387 3388 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3389 self._match_text_seq("LENGTH") 3390 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3391 3392 def _parse_not_constraint( 3393 self, 3394 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3395 if self._match_text_seq("NULL"): 3396 return self.expression(exp.NotNullColumnConstraint) 3397 if self._match_text_seq("CASESPECIFIC"): 3398 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3399 return None 3400 3401 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3402 if self._match(TokenType.CONSTRAINT): 3403 this = self._parse_id_var() 3404 else: 3405 this = None 3406 3407 if self._match_texts(self.CONSTRAINT_PARSERS): 3408 return self.expression( 3409 exp.ColumnConstraint, 3410 this=this, 3411 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3412 ) 3413 3414 return this 3415 3416 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3417 if not self._match(TokenType.CONSTRAINT): 3418 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3419 3420 this = self._parse_id_var() 3421 expressions = [] 3422 3423 while True: 3424 constraint = self._parse_unnamed_constraint() or self._parse_function() 3425 if not constraint: 3426 break 3427 expressions.append(constraint) 3428 3429 return self.expression(exp.Constraint, this=this, expressions=expressions) 3430 3431 def _parse_unnamed_constraint( 3432 self, constraints: t.Optional[t.Collection[str]] = None 3433 ) -> t.Optional[exp.Expression]: 3434 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3435 return None 3436 3437 constraint = self._prev.text.upper() 3438 if constraint not in self.CONSTRAINT_PARSERS: 3439 self.raise_error(f"No parser found for schema constraint {constraint}.") 3440 3441 return self.CONSTRAINT_PARSERS[constraint](self) 3442 3443 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3444 self._match_text_seq("KEY") 3445 return self.expression( 3446 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3447 ) 3448 3449 def _parse_key_constraint_options(self) -> t.List[str]: 3450 options = [] 3451 while True: 3452 if not self._curr: 3453 break 3454 3455 if self._match(TokenType.ON): 3456 action = None 3457 on = self._advance_any() and self._prev.text 3458 3459 if self._match_text_seq("NO", "ACTION"): 3460 action = "NO ACTION" 3461 elif self._match_text_seq("CASCADE"): 3462 action = "CASCADE" 3463 elif self._match_pair(TokenType.SET, TokenType.NULL): 3464 action = "SET NULL" 3465 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3466 action = "SET DEFAULT" 3467 else: 3468 self.raise_error("Invalid key constraint") 3469 3470 options.append(f"ON {on} {action}") 3471 elif self._match_text_seq("NOT", "ENFORCED"): 3472 options.append("NOT ENFORCED") 3473 elif self._match_text_seq("DEFERRABLE"): 3474 options.append("DEFERRABLE") 3475 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3476 options.append("INITIALLY DEFERRED") 3477 elif self._match_text_seq("NORELY"): 3478 options.append("NORELY") 3479 elif self._match_text_seq("MATCH", "FULL"): 3480 options.append("MATCH FULL") 3481 else: 3482 break 3483 3484 return options 3485 3486 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3487 if match and not self._match(TokenType.REFERENCES): 3488 return None 3489 3490 expressions = None 3491 this = self._parse_id_var() 3492 3493 if self._match(TokenType.L_PAREN, advance=False): 3494 expressions = self._parse_wrapped_id_vars() 3495 3496 options = self._parse_key_constraint_options() 3497 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3498 3499 def _parse_foreign_key(self) -> exp.ForeignKey: 3500 expressions = self._parse_wrapped_id_vars() 3501 reference = self._parse_references() 3502 options = {} 3503 3504 while self._match(TokenType.ON): 3505 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3506 self.raise_error("Expected DELETE or UPDATE") 3507 3508 kind = self._prev.text.lower() 3509 3510 if self._match_text_seq("NO", "ACTION"): 3511 action = "NO ACTION" 3512 elif self._match(TokenType.SET): 3513 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3514 action = "SET " + self._prev.text.upper() 3515 else: 3516 self._advance() 3517 action = self._prev.text.upper() 3518 3519 options[kind] = action 3520 3521 return self.expression( 3522 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3523 ) 3524 3525 def _parse_primary_key( 3526 self, wrapped_optional: bool = False, in_props: bool = False 3527 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3528 desc = ( 3529 self._match_set((TokenType.ASC, TokenType.DESC)) 3530 and self._prev.token_type == TokenType.DESC 3531 ) 3532 3533 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3534 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3535 3536 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3537 options = self._parse_key_constraint_options() 3538 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3539 3540 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3541 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3542 return this 3543 3544 bracket_kind = self._prev.token_type 3545 3546 if self._match(TokenType.COLON): 3547 expressions: t.List[t.Optional[exp.Expression]] = [ 3548 self.expression(exp.Slice, expression=self._parse_conjunction()) 3549 ] 3550 else: 3551 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3552 3553 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3554 if bracket_kind == TokenType.L_BRACE: 3555 this = self.expression(exp.Struct, expressions=expressions) 3556 elif not this or this.name.upper() == "ARRAY": 3557 this = self.expression(exp.Array, expressions=expressions) 3558 else: 3559 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3560 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3561 3562 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3563 self.raise_error("Expected ]") 3564 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3565 self.raise_error("Expected }") 3566 3567 self._add_comments(this) 3568 return self._parse_bracket(this) 3569 3570 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3571 if self._match(TokenType.COLON): 3572 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3573 return this 3574 3575 def _parse_case(self) -> t.Optional[exp.Expression]: 3576 ifs = [] 3577 default = None 3578 3579 expression = self._parse_conjunction() 3580 3581 while self._match(TokenType.WHEN): 3582 this = self._parse_conjunction() 3583 self._match(TokenType.THEN) 3584 then = self._parse_conjunction() 3585 ifs.append(self.expression(exp.If, this=this, true=then)) 3586 3587 if self._match(TokenType.ELSE): 3588 default = self._parse_conjunction() 3589 3590 if not self._match(TokenType.END): 3591 self.raise_error("Expected END after CASE", self._prev) 3592 3593 return self._parse_window( 3594 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3595 ) 3596 3597 def _parse_if(self) -> t.Optional[exp.Expression]: 3598 if self._match(TokenType.L_PAREN): 3599 args = self._parse_csv(self._parse_conjunction) 3600 this = self.validate_expression(exp.If.from_arg_list(args), args) 3601 self._match_r_paren() 3602 else: 3603 index = self._index - 1 3604 condition = self._parse_conjunction() 3605 3606 if not condition: 3607 self._retreat(index) 3608 return None 3609 3610 self._match(TokenType.THEN) 3611 true = self._parse_conjunction() 3612 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3613 self._match(TokenType.END) 3614 this = self.expression(exp.If, this=condition, true=true, false=false) 3615 3616 return self._parse_window(this) 3617 3618 def _parse_extract(self) -> exp.Extract: 3619 this = self._parse_function() or self._parse_var() or self._parse_type() 3620 3621 if self._match(TokenType.FROM): 3622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3623 3624 if not self._match(TokenType.COMMA): 3625 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3626 3627 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3628 3629 def _parse_cast(self, strict: bool) -> exp.Expression: 3630 this = self._parse_conjunction() 3631 3632 if not self._match(TokenType.ALIAS): 3633 if self._match(TokenType.COMMA): 3634 return self.expression( 3635 exp.CastToStrType, this=this, expression=self._parse_string() 3636 ) 3637 else: 3638 self.raise_error("Expected AS after CAST") 3639 3640 to = self._parse_types() 3641 3642 if not to: 3643 self.raise_error("Expected TYPE after CAST") 3644 elif to.this == exp.DataType.Type.CHAR: 3645 if self._match(TokenType.CHARACTER_SET): 3646 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3647 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3648 fmt = self._parse_string() 3649 3650 return self.expression( 3651 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3652 this=this, 3653 format=exp.Literal.string( 3654 format_time( 3655 fmt.this if fmt else "", 3656 self.FORMAT_MAPPING or self.TIME_MAPPING, 3657 self.FORMAT_TRIE or self.TIME_TRIE, 3658 ) 3659 ), 3660 ) 3661 3662 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3663 3664 def _parse_concat(self) -> t.Optional[exp.Expression]: 3665 args = self._parse_csv(self._parse_conjunction) 3666 if self.CONCAT_NULL_OUTPUTS_STRING: 3667 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3668 3669 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3670 # we find such a call we replace it with its argument. 3671 if len(args) == 1: 3672 return args[0] 3673 3674 return self.expression( 3675 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3676 ) 3677 3678 def _parse_string_agg(self) -> exp.Expression: 3679 expression: t.Optional[exp.Expression] 3680 3681 if self._match(TokenType.DISTINCT): 3682 args = self._parse_csv(self._parse_conjunction) 3683 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3684 else: 3685 args = self._parse_csv(self._parse_conjunction) 3686 expression = seq_get(args, 0) 3687 3688 index = self._index 3689 if not self._match(TokenType.R_PAREN): 3690 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3691 order = self._parse_order(this=expression) 3692 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3693 3694 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3695 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3696 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3697 if not self._match_text_seq("WITHIN", "GROUP"): 3698 self._retreat(index) 3699 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3700 3701 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3702 order = self._parse_order(this=expression) 3703 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3704 3705 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3706 to: t.Optional[exp.Expression] 3707 this = self._parse_bitwise() 3708 3709 if self._match(TokenType.USING): 3710 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3711 elif self._match(TokenType.COMMA): 3712 to = self._parse_bitwise() 3713 else: 3714 to = None 3715 3716 # Swap the argument order if needed to produce the correct AST 3717 if self.CONVERT_TYPE_FIRST: 3718 this, to = to, this 3719 3720 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3721 3722 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3723 """ 3724 There are generally two variants of the DECODE function: 3725 3726 - DECODE(bin, charset) 3727 - DECODE(expression, search, result [, search, result] ... [, default]) 3728 3729 The second variant will always be parsed into a CASE expression. Note that NULL 3730 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3731 instead of relying on pattern matching. 3732 """ 3733 args = self._parse_csv(self._parse_conjunction) 3734 3735 if len(args) < 3: 3736 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3737 3738 expression, *expressions = args 3739 if not expression: 3740 return None 3741 3742 ifs = [] 3743 for search, result in zip(expressions[::2], expressions[1::2]): 3744 if not search or not result: 3745 return None 3746 3747 if isinstance(search, exp.Literal): 3748 ifs.append( 3749 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3750 ) 3751 elif isinstance(search, exp.Null): 3752 ifs.append( 3753 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3754 ) 3755 else: 3756 cond = exp.or_( 3757 exp.EQ(this=expression.copy(), expression=search), 3758 exp.and_( 3759 exp.Is(this=expression.copy(), expression=exp.Null()), 3760 exp.Is(this=search.copy(), expression=exp.Null()), 3761 copy=False, 3762 ), 3763 copy=False, 3764 ) 3765 ifs.append(exp.If(this=cond, true=result)) 3766 3767 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3768 3769 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3770 self._match_text_seq("KEY") 3771 key = self._parse_field() 3772 self._match(TokenType.COLON) 3773 self._match_text_seq("VALUE") 3774 value = self._parse_field() 3775 3776 if not key and not value: 3777 return None 3778 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3779 3780 def _parse_json_object(self) -> exp.JSONObject: 3781 star = self._parse_star() 3782 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3783 3784 null_handling = None 3785 if self._match_text_seq("NULL", "ON", "NULL"): 3786 null_handling = "NULL ON NULL" 3787 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3788 null_handling = "ABSENT ON NULL" 3789 3790 unique_keys = None 3791 if self._match_text_seq("WITH", "UNIQUE"): 3792 unique_keys = True 3793 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3794 unique_keys = False 3795 3796 self._match_text_seq("KEYS") 3797 3798 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3799 format_json = self._match_text_seq("FORMAT", "JSON") 3800 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3801 3802 return self.expression( 3803 exp.JSONObject, 3804 expressions=expressions, 3805 null_handling=null_handling, 3806 unique_keys=unique_keys, 3807 return_type=return_type, 3808 format_json=format_json, 3809 encoding=encoding, 3810 ) 3811 3812 def _parse_logarithm(self) -> exp.Func: 3813 # Default argument order is base, expression 3814 args = self._parse_csv(self._parse_range) 3815 3816 if len(args) > 1: 3817 if not self.LOG_BASE_FIRST: 3818 args.reverse() 3819 return exp.Log.from_arg_list(args) 3820 3821 return self.expression( 3822 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3823 ) 3824 3825 def _parse_match_against(self) -> exp.MatchAgainst: 3826 expressions = self._parse_csv(self._parse_column) 3827 3828 self._match_text_seq(")", "AGAINST", "(") 3829 3830 this = self._parse_string() 3831 3832 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3833 modifier = "IN NATURAL LANGUAGE MODE" 3834 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3835 modifier = f"{modifier} WITH QUERY EXPANSION" 3836 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3837 modifier = "IN BOOLEAN MODE" 3838 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3839 modifier = "WITH QUERY EXPANSION" 3840 else: 3841 modifier = None 3842 3843 return self.expression( 3844 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3845 ) 3846 3847 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3848 def _parse_open_json(self) -> exp.OpenJSON: 3849 this = self._parse_bitwise() 3850 path = self._match(TokenType.COMMA) and self._parse_string() 3851 3852 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3853 this = self._parse_field(any_token=True) 3854 kind = self._parse_types() 3855 path = self._parse_string() 3856 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3857 3858 return self.expression( 3859 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3860 ) 3861 3862 expressions = None 3863 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3864 self._match_l_paren() 3865 expressions = self._parse_csv(_parse_open_json_column_def) 3866 3867 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3868 3869 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3870 args = self._parse_csv(self._parse_bitwise) 3871 3872 if self._match(TokenType.IN): 3873 return self.expression( 3874 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3875 ) 3876 3877 if haystack_first: 3878 haystack = seq_get(args, 0) 3879 needle = seq_get(args, 1) 3880 else: 3881 needle = seq_get(args, 0) 3882 haystack = seq_get(args, 1) 3883 3884 return self.expression( 3885 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3886 ) 3887 3888 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3889 args = self._parse_csv(self._parse_table) 3890 return exp.JoinHint(this=func_name.upper(), expressions=args) 3891 3892 def _parse_substring(self) -> exp.Substring: 3893 # Postgres supports the form: substring(string [from int] [for int]) 3894 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3895 3896 args = self._parse_csv(self._parse_bitwise) 3897 3898 if self._match(TokenType.FROM): 3899 args.append(self._parse_bitwise()) 3900 if self._match(TokenType.FOR): 3901 args.append(self._parse_bitwise()) 3902 3903 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3904 3905 def _parse_trim(self) -> exp.Trim: 3906 # https://www.w3resource.com/sql/character-functions/trim.php 3907 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3908 3909 position = None 3910 collation = None 3911 3912 if self._match_texts(self.TRIM_TYPES): 3913 position = self._prev.text.upper() 3914 3915 expression = self._parse_bitwise() 3916 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3917 this = self._parse_bitwise() 3918 else: 3919 this = expression 3920 expression = None 3921 3922 if self._match(TokenType.COLLATE): 3923 collation = self._parse_bitwise() 3924 3925 return self.expression( 3926 exp.Trim, this=this, position=position, expression=expression, collation=collation 3927 ) 3928 3929 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3930 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3931 3932 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3933 return self._parse_window(self._parse_id_var(), alias=True) 3934 3935 def _parse_respect_or_ignore_nulls( 3936 self, this: t.Optional[exp.Expression] 3937 ) -> t.Optional[exp.Expression]: 3938 if self._match_text_seq("IGNORE", "NULLS"): 3939 return self.expression(exp.IgnoreNulls, this=this) 3940 if self._match_text_seq("RESPECT", "NULLS"): 3941 return self.expression(exp.RespectNulls, this=this) 3942 return this 3943 3944 def _parse_window( 3945 self, this: t.Optional[exp.Expression], alias: bool = False 3946 ) -> t.Optional[exp.Expression]: 3947 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3948 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3949 self._match_r_paren() 3950 3951 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3952 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3953 if self._match_text_seq("WITHIN", "GROUP"): 3954 order = self._parse_wrapped(self._parse_order) 3955 this = self.expression(exp.WithinGroup, this=this, expression=order) 3956 3957 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3958 # Some dialects choose to implement and some do not. 3959 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3960 3961 # There is some code above in _parse_lambda that handles 3962 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3963 3964 # The below changes handle 3965 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3966 3967 # Oracle allows both formats 3968 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3969 # and Snowflake chose to do the same for familiarity 3970 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3971 this = self._parse_respect_or_ignore_nulls(this) 3972 3973 # bigquery select from window x AS (partition by ...) 3974 if alias: 3975 over = None 3976 self._match(TokenType.ALIAS) 3977 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3978 return this 3979 else: 3980 over = self._prev.text.upper() 3981 3982 if not self._match(TokenType.L_PAREN): 3983 return self.expression( 3984 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3985 ) 3986 3987 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3988 3989 first = self._match(TokenType.FIRST) 3990 if self._match_text_seq("LAST"): 3991 first = False 3992 3993 partition = self._parse_partition_by() 3994 order = self._parse_order() 3995 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3996 3997 if kind: 3998 self._match(TokenType.BETWEEN) 3999 start = self._parse_window_spec() 4000 self._match(TokenType.AND) 4001 end = self._parse_window_spec() 4002 4003 spec = self.expression( 4004 exp.WindowSpec, 4005 kind=kind, 4006 start=start["value"], 4007 start_side=start["side"], 4008 end=end["value"], 4009 end_side=end["side"], 4010 ) 4011 else: 4012 spec = None 4013 4014 self._match_r_paren() 4015 4016 return self.expression( 4017 exp.Window, 4018 this=this, 4019 partition_by=partition, 4020 order=order, 4021 spec=spec, 4022 alias=window_alias, 4023 over=over, 4024 first=first, 4025 ) 4026 4027 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4028 self._match(TokenType.BETWEEN) 4029 4030 return { 4031 "value": ( 4032 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4033 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4034 or self._parse_bitwise() 4035 ), 4036 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4037 } 4038 4039 def _parse_alias( 4040 self, this: t.Optional[exp.Expression], explicit: bool = False 4041 ) -> t.Optional[exp.Expression]: 4042 any_token = self._match(TokenType.ALIAS) 4043 4044 if explicit and not any_token: 4045 return this 4046 4047 if self._match(TokenType.L_PAREN): 4048 aliases = self.expression( 4049 exp.Aliases, 4050 this=this, 4051 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4052 ) 4053 self._match_r_paren(aliases) 4054 return aliases 4055 4056 alias = self._parse_id_var(any_token) 4057 4058 if alias: 4059 return self.expression(exp.Alias, this=this, alias=alias) 4060 4061 return this 4062 4063 def _parse_id_var( 4064 self, 4065 any_token: bool = True, 4066 tokens: t.Optional[t.Collection[TokenType]] = None, 4067 ) -> t.Optional[exp.Expression]: 4068 identifier = self._parse_identifier() 4069 4070 if identifier: 4071 return identifier 4072 4073 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4074 quoted = self._prev.token_type == TokenType.STRING 4075 return exp.Identifier(this=self._prev.text, quoted=quoted) 4076 4077 return None 4078 4079 def _parse_string(self) -> t.Optional[exp.Expression]: 4080 if self._match(TokenType.STRING): 4081 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4082 return self._parse_placeholder() 4083 4084 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4085 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4086 4087 def _parse_number(self) -> t.Optional[exp.Expression]: 4088 if self._match(TokenType.NUMBER): 4089 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4090 return self._parse_placeholder() 4091 4092 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4093 if self._match(TokenType.IDENTIFIER): 4094 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4095 return self._parse_placeholder() 4096 4097 def _parse_var( 4098 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4099 ) -> t.Optional[exp.Expression]: 4100 if ( 4101 (any_token and self._advance_any()) 4102 or self._match(TokenType.VAR) 4103 or (self._match_set(tokens) if tokens else False) 4104 ): 4105 return self.expression(exp.Var, this=self._prev.text) 4106 return self._parse_placeholder() 4107 4108 def _advance_any(self) -> t.Optional[Token]: 4109 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4110 self._advance() 4111 return self._prev 4112 return None 4113 4114 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4115 return self._parse_var() or self._parse_string() 4116 4117 def _parse_null(self) -> t.Optional[exp.Expression]: 4118 if self._match(TokenType.NULL): 4119 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4120 return None 4121 4122 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4123 if self._match(TokenType.TRUE): 4124 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4125 if self._match(TokenType.FALSE): 4126 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4127 return None 4128 4129 def _parse_star(self) -> t.Optional[exp.Expression]: 4130 if self._match(TokenType.STAR): 4131 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4132 return None 4133 4134 def _parse_parameter(self) -> exp.Parameter: 4135 wrapped = self._match(TokenType.L_BRACE) 4136 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4137 self._match(TokenType.R_BRACE) 4138 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4139 4140 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4141 if self._match_set(self.PLACEHOLDER_PARSERS): 4142 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4143 if placeholder: 4144 return placeholder 4145 self._advance(-1) 4146 return None 4147 4148 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4149 if not self._match(TokenType.EXCEPT): 4150 return None 4151 if self._match(TokenType.L_PAREN, advance=False): 4152 return self._parse_wrapped_csv(self._parse_column) 4153 return self._parse_csv(self._parse_column) 4154 4155 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4156 if not self._match(TokenType.REPLACE): 4157 return None 4158 if self._match(TokenType.L_PAREN, advance=False): 4159 return self._parse_wrapped_csv(self._parse_expression) 4160 return self._parse_csv(self._parse_expression) 4161 4162 def _parse_csv( 4163 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4164 ) -> t.List[t.Optional[exp.Expression]]: 4165 parse_result = parse_method() 4166 items = [parse_result] if parse_result is not None else [] 4167 4168 while self._match(sep): 4169 self._add_comments(parse_result) 4170 parse_result = parse_method() 4171 if parse_result is not None: 4172 items.append(parse_result) 4173 4174 return items 4175 4176 def _parse_tokens( 4177 self, parse_method: t.Callable, expressions: t.Dict 4178 ) -> t.Optional[exp.Expression]: 4179 this = parse_method() 4180 4181 while self._match_set(expressions): 4182 this = self.expression( 4183 expressions[self._prev.token_type], 4184 this=this, 4185 comments=self._prev_comments, 4186 expression=parse_method(), 4187 ) 4188 4189 return this 4190 4191 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4192 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4193 4194 def _parse_wrapped_csv( 4195 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4196 ) -> t.List[t.Optional[exp.Expression]]: 4197 return self._parse_wrapped( 4198 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4199 ) 4200 4201 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4202 wrapped = self._match(TokenType.L_PAREN) 4203 if not wrapped and not optional: 4204 self.raise_error("Expecting (") 4205 parse_result = parse_method() 4206 if wrapped: 4207 self._match_r_paren() 4208 return parse_result 4209 4210 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4211 return self._parse_select() or self._parse_set_operations( 4212 self._parse_expression() if alias else self._parse_conjunction() 4213 ) 4214 4215 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4216 return self._parse_query_modifiers( 4217 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4218 ) 4219 4220 def _parse_transaction(self) -> exp.Transaction: 4221 this = None 4222 if self._match_texts(self.TRANSACTION_KIND): 4223 this = self._prev.text 4224 4225 self._match_texts({"TRANSACTION", "WORK"}) 4226 4227 modes = [] 4228 while True: 4229 mode = [] 4230 while self._match(TokenType.VAR): 4231 mode.append(self._prev.text) 4232 4233 if mode: 4234 modes.append(" ".join(mode)) 4235 if not self._match(TokenType.COMMA): 4236 break 4237 4238 return self.expression(exp.Transaction, this=this, modes=modes) 4239 4240 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4241 chain = None 4242 savepoint = None 4243 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4244 4245 self._match_texts({"TRANSACTION", "WORK"}) 4246 4247 if self._match_text_seq("TO"): 4248 self._match_text_seq("SAVEPOINT") 4249 savepoint = self._parse_id_var() 4250 4251 if self._match(TokenType.AND): 4252 chain = not self._match_text_seq("NO") 4253 self._match_text_seq("CHAIN") 4254 4255 if is_rollback: 4256 return self.expression(exp.Rollback, savepoint=savepoint) 4257 4258 return self.expression(exp.Commit, chain=chain) 4259 4260 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4261 if not self._match_text_seq("ADD"): 4262 return None 4263 4264 self._match(TokenType.COLUMN) 4265 exists_column = self._parse_exists(not_=True) 4266 expression = self._parse_column_def(self._parse_field(any_token=True)) 4267 4268 if expression: 4269 expression.set("exists", exists_column) 4270 4271 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4272 if self._match_texts(("FIRST", "AFTER")): 4273 position = self._prev.text 4274 column_position = self.expression( 4275 exp.ColumnPosition, this=self._parse_column(), position=position 4276 ) 4277 expression.set("position", column_position) 4278 4279 return expression 4280 4281 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4282 drop = self._match(TokenType.DROP) and self._parse_drop() 4283 if drop and not isinstance(drop, exp.Command): 4284 drop.set("kind", drop.args.get("kind", "COLUMN")) 4285 return drop 4286 4287 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4288 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4289 return self.expression( 4290 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4291 ) 4292 4293 def _parse_add_constraint(self) -> exp.AddConstraint: 4294 this = None 4295 kind = self._prev.token_type 4296 4297 if kind == TokenType.CONSTRAINT: 4298 this = self._parse_id_var() 4299 4300 if self._match_text_seq("CHECK"): 4301 expression = self._parse_wrapped(self._parse_conjunction) 4302 enforced = self._match_text_seq("ENFORCED") 4303 4304 return self.expression( 4305 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4306 ) 4307 4308 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4309 expression = self._parse_foreign_key() 4310 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4311 expression = self._parse_primary_key() 4312 else: 4313 expression = None 4314 4315 return self.expression(exp.AddConstraint, this=this, expression=expression) 4316 4317 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4318 index = self._index - 1 4319 4320 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4321 return self._parse_csv(self._parse_add_constraint) 4322 4323 self._retreat(index) 4324 return self._parse_csv(self._parse_add_column) 4325 4326 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4327 self._match(TokenType.COLUMN) 4328 column = self._parse_field(any_token=True) 4329 4330 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4331 return self.expression(exp.AlterColumn, this=column, drop=True) 4332 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4333 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4334 4335 self._match_text_seq("SET", "DATA") 4336 return self.expression( 4337 exp.AlterColumn, 4338 this=column, 4339 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4340 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4341 using=self._match(TokenType.USING) and self._parse_conjunction(), 4342 ) 4343 4344 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4345 index = self._index - 1 4346 4347 partition_exists = self._parse_exists() 4348 if self._match(TokenType.PARTITION, advance=False): 4349 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4350 4351 self._retreat(index) 4352 return self._parse_csv(self._parse_drop_column) 4353 4354 def _parse_alter_table_rename(self) -> exp.RenameTable: 4355 self._match_text_seq("TO") 4356 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4357 4358 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4359 start = self._prev 4360 4361 if not self._match(TokenType.TABLE): 4362 return self._parse_as_command(start) 4363 4364 exists = self._parse_exists() 4365 this = self._parse_table(schema=True) 4366 4367 if self._next: 4368 self._advance() 4369 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4370 4371 if parser: 4372 actions = ensure_list(parser(self)) 4373 4374 if not self._curr: 4375 return self.expression( 4376 exp.AlterTable, 4377 this=this, 4378 exists=exists, 4379 actions=actions, 4380 ) 4381 return self._parse_as_command(start) 4382 4383 def _parse_merge(self) -> exp.Merge: 4384 self._match(TokenType.INTO) 4385 target = self._parse_table() 4386 4387 self._match(TokenType.USING) 4388 using = self._parse_table() 4389 4390 self._match(TokenType.ON) 4391 on = self._parse_conjunction() 4392 4393 whens = [] 4394 while self._match(TokenType.WHEN): 4395 matched = not self._match(TokenType.NOT) 4396 self._match_text_seq("MATCHED") 4397 source = ( 4398 False 4399 if self._match_text_seq("BY", "TARGET") 4400 else self._match_text_seq("BY", "SOURCE") 4401 ) 4402 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4403 4404 self._match(TokenType.THEN) 4405 4406 if self._match(TokenType.INSERT): 4407 _this = self._parse_star() 4408 if _this: 4409 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4410 else: 4411 then = self.expression( 4412 exp.Insert, 4413 this=self._parse_value(), 4414 expression=self._match(TokenType.VALUES) and self._parse_value(), 4415 ) 4416 elif self._match(TokenType.UPDATE): 4417 expressions = self._parse_star() 4418 if expressions: 4419 then = self.expression(exp.Update, expressions=expressions) 4420 else: 4421 then = self.expression( 4422 exp.Update, 4423 expressions=self._match(TokenType.SET) 4424 and self._parse_csv(self._parse_equality), 4425 ) 4426 elif self._match(TokenType.DELETE): 4427 then = self.expression(exp.Var, this=self._prev.text) 4428 else: 4429 then = None 4430 4431 whens.append( 4432 self.expression( 4433 exp.When, 4434 matched=matched, 4435 source=source, 4436 condition=condition, 4437 then=then, 4438 ) 4439 ) 4440 4441 return self.expression( 4442 exp.Merge, 4443 this=target, 4444 using=using, 4445 on=on, 4446 expressions=whens, 4447 ) 4448 4449 def _parse_show(self) -> t.Optional[exp.Expression]: 4450 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4451 if parser: 4452 return parser(self) 4453 self._advance() 4454 return self.expression(exp.Show, this=self._prev.text.upper()) 4455 4456 def _parse_set_item_assignment( 4457 self, kind: t.Optional[str] = None 4458 ) -> t.Optional[exp.Expression]: 4459 index = self._index 4460 4461 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4462 return self._parse_set_transaction(global_=kind == "GLOBAL") 4463 4464 left = self._parse_primary() or self._parse_id_var() 4465 4466 if not self._match_texts(("=", "TO")): 4467 self._retreat(index) 4468 return None 4469 4470 right = self._parse_statement() or self._parse_id_var() 4471 this = self.expression(exp.EQ, this=left, expression=right) 4472 4473 return self.expression(exp.SetItem, this=this, kind=kind) 4474 4475 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4476 self._match_text_seq("TRANSACTION") 4477 characteristics = self._parse_csv( 4478 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4479 ) 4480 return self.expression( 4481 exp.SetItem, 4482 expressions=characteristics, 4483 kind="TRANSACTION", 4484 **{"global": global_}, # type: ignore 4485 ) 4486 4487 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4488 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4489 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4490 4491 def _parse_set(self) -> exp.Set | exp.Command: 4492 index = self._index 4493 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4494 4495 if self._curr: 4496 self._retreat(index) 4497 return self._parse_as_command(self._prev) 4498 4499 return set_ 4500 4501 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4502 for option in options: 4503 if self._match_text_seq(*option.split(" ")): 4504 return exp.var(option) 4505 return None 4506 4507 def _parse_as_command(self, start: Token) -> exp.Command: 4508 while self._curr: 4509 self._advance() 4510 text = self._find_sql(start, self._prev) 4511 size = len(start.text) 4512 return exp.Command(this=text[:size], expression=text[size:]) 4513 4514 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4515 settings = [] 4516 4517 self._match_l_paren() 4518 kind = self._parse_id_var() 4519 4520 if self._match(TokenType.L_PAREN): 4521 while True: 4522 key = self._parse_id_var() 4523 value = self._parse_primary() 4524 4525 if not key and value is None: 4526 break 4527 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4528 self._match(TokenType.R_PAREN) 4529 4530 self._match_r_paren() 4531 4532 return self.expression( 4533 exp.DictProperty, 4534 this=this, 4535 kind=kind.this if kind else None, 4536 settings=settings, 4537 ) 4538 4539 def _parse_dict_range(self, this: str) -> exp.DictRange: 4540 self._match_l_paren() 4541 has_min = self._match_text_seq("MIN") 4542 if has_min: 4543 min = self._parse_var() or self._parse_primary() 4544 self._match_text_seq("MAX") 4545 max = self._parse_var() or self._parse_primary() 4546 else: 4547 max = self._parse_var() or self._parse_primary() 4548 min = exp.Literal.number(0) 4549 self._match_r_paren() 4550 return self.expression(exp.DictRange, this=this, min=min, max=max) 4551 4552 def _find_parser( 4553 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4554 ) -> t.Optional[t.Callable]: 4555 if not self._curr: 4556 return None 4557 4558 index = self._index 4559 this = [] 4560 while True: 4561 # The current token might be multiple words 4562 curr = self._curr.text.upper() 4563 key = curr.split(" ") 4564 this.append(curr) 4565 self._advance() 4566 result, trie = in_trie(trie, key) 4567 if result == 0: 4568 break 4569 if result == 2: 4570 subparser = parsers[" ".join(this)] 4571 return subparser 4572 self._retreat(index) 4573 return None 4574 4575 def _match(self, token_type, advance=True, expression=None): 4576 if not self._curr: 4577 return None 4578 4579 if self._curr.token_type == token_type: 4580 if advance: 4581 self._advance() 4582 self._add_comments(expression) 4583 return True 4584 4585 return None 4586 4587 def _match_set(self, types, advance=True): 4588 if not self._curr: 4589 return None 4590 4591 if self._curr.token_type in types: 4592 if advance: 4593 self._advance() 4594 return True 4595 4596 return None 4597 4598 def _match_pair(self, token_type_a, token_type_b, advance=True): 4599 if not self._curr or not self._next: 4600 return None 4601 4602 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4603 if advance: 4604 self._advance(2) 4605 return True 4606 4607 return None 4608 4609 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4610 if not self._match(TokenType.L_PAREN, expression=expression): 4611 self.raise_error("Expecting (") 4612 4613 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4614 if not self._match(TokenType.R_PAREN, expression=expression): 4615 self.raise_error("Expecting )") 4616 4617 def _match_texts(self, texts, advance=True): 4618 if self._curr and self._curr.text.upper() in texts: 4619 if advance: 4620 self._advance() 4621 return True 4622 return False 4623 4624 def _match_text_seq(self, *texts, advance=True): 4625 index = self._index 4626 for text in texts: 4627 if self._curr and self._curr.text.upper() == text: 4628 self._advance() 4629 else: 4630 self._retreat(index) 4631 return False 4632 4633 if not advance: 4634 self._retreat(index) 4635 4636 return True 4637 4638 @t.overload 4639 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4640 ... 4641 4642 @t.overload 4643 def _replace_columns_with_dots( 4644 self, this: t.Optional[exp.Expression] 4645 ) -> t.Optional[exp.Expression]: 4646 ... 4647 4648 def _replace_columns_with_dots(self, this): 4649 if isinstance(this, exp.Dot): 4650 exp.replace_children(this, self._replace_columns_with_dots) 4651 elif isinstance(this, exp.Column): 4652 exp.replace_children(this, self._replace_columns_with_dots) 4653 table = this.args.get("table") 4654 this = ( 4655 self.expression(exp.Dot, this=table, expression=this.this) 4656 if table 4657 else self.expression(exp.Var, this=this.name) 4658 ) 4659 elif isinstance(this, exp.Identifier): 4660 this = self.expression(exp.Var, this=this.name) 4661 4662 return this 4663 4664 def _replace_lambda( 4665 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4666 ) -> t.Optional[exp.Expression]: 4667 if not node: 4668 return node 4669 4670 for column in node.find_all(exp.Column): 4671 if column.parts[0].name in lambda_variables: 4672 dot_or_id = column.to_dot() if column.table else column.this 4673 parent = column.parent 4674 4675 while isinstance(parent, exp.Dot): 4676 if not isinstance(parent.parent, exp.Dot): 4677 parent.replace(dot_or_id) 4678 break 4679 parent = parent.parent 4680 else: 4681 if column is node: 4682 node = dot_or_id 4683 else: 4684 column.replace(dot_or_id) 4685 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, 545 **{"except": self._parse_except(), "replace": self._parse_replace()}, 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER": lambda self: self._parse_cluster(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 592 "DEFINER": lambda self: self._parse_definer(), 593 "DETERMINISTIC": lambda self: self.expression( 594 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 595 ), 596 "DISTKEY": lambda self: self._parse_distkey(), 597 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 598 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 599 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 600 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 601 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 602 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 603 "FREESPACE": lambda self: self._parse_freespace(), 604 "IMMUTABLE": lambda self: self.expression( 605 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 606 ), 607 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 608 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 609 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 610 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 611 "LIKE": lambda self: self._parse_create_like(), 612 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 613 "LOCK": lambda self: self._parse_locking(), 614 "LOCKING": lambda self: self._parse_locking(), 615 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 616 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 617 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 618 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 619 "NO": lambda self: self._parse_no_property(), 620 "ON": lambda self: self._parse_on_property(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 626 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 627 "RETURNS": lambda self: self._parse_returns(), 628 "ROW": lambda self: self._parse_row(), 629 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 630 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 631 "SETTINGS": lambda self: self.expression( 632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 633 ), 634 "SORTKEY": lambda self: self._parse_sortkey(), 635 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 636 "STABLE": lambda self: self.expression( 637 exp.StabilityProperty, this=exp.Literal.string("STABLE") 638 ), 639 "STORED": lambda self: self._parse_stored(), 640 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 641 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 642 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 643 "TO": lambda self: self._parse_to_table(), 644 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 645 "TTL": lambda self: self._parse_ttl(), 646 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "VOLATILE": lambda self: self._parse_volatile_property(), 648 "WITH": lambda self: self._parse_with_property(), 649 } 650 651 CONSTRAINT_PARSERS = { 652 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 653 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 654 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 655 "CHARACTER SET": lambda self: self.expression( 656 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 657 ), 658 "CHECK": lambda self: self.expression( 659 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 660 ), 661 "COLLATE": lambda self: self.expression( 662 exp.CollateColumnConstraint, this=self._parse_var() 663 ), 664 "COMMENT": lambda self: self.expression( 665 exp.CommentColumnConstraint, this=self._parse_string() 666 ), 667 "COMPRESS": lambda self: self._parse_compress(), 668 "DEFAULT": lambda self: self.expression( 669 exp.DefaultColumnConstraint, this=self._parse_bitwise() 670 ), 671 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 672 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 673 "FORMAT": lambda self: self.expression( 674 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 675 ), 676 "GENERATED": lambda self: self._parse_generated_as_identity(), 677 "IDENTITY": lambda self: self._parse_auto_increment(), 678 "INLINE": lambda self: self._parse_inline(), 679 "LIKE": lambda self: self._parse_create_like(), 680 "NOT": lambda self: self._parse_not_constraint(), 681 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 682 "ON": lambda self: self._match(TokenType.UPDATE) 683 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 684 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 685 "PRIMARY KEY": lambda self: self._parse_primary_key(), 686 "REFERENCES": lambda self: self._parse_references(match=False), 687 "TITLE": lambda self: self.expression( 688 exp.TitleColumnConstraint, this=self._parse_var_or_string() 689 ), 690 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 691 "UNIQUE": lambda self: self._parse_unique(), 692 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 693 } 694 695 ALTER_PARSERS = { 696 "ADD": lambda self: self._parse_alter_table_add(), 697 "ALTER": lambda self: self._parse_alter_table_alter(), 698 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 699 "DROP": lambda self: self._parse_alter_table_drop(), 700 "RENAME": lambda self: self._parse_alter_table_rename(), 701 } 702 703 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 704 705 NO_PAREN_FUNCTION_PARSERS = { 706 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 707 TokenType.CASE: lambda self: self._parse_case(), 708 TokenType.IF: lambda self: self._parse_if(), 709 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 710 exp.NextValueFor, 711 this=self._parse_column(), 712 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 713 ), 714 } 715 716 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 717 718 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 719 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 720 "CONCAT": lambda self: self._parse_concat(), 721 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 722 "DECODE": lambda self: self._parse_decode(), 723 "EXTRACT": lambda self: self._parse_extract(), 724 "JSON_OBJECT": lambda self: self._parse_json_object(), 725 "LOG": lambda self: self._parse_logarithm(), 726 "MATCH": lambda self: self._parse_match_against(), 727 "OPENJSON": lambda self: self._parse_open_json(), 728 "POSITION": lambda self: self._parse_position(), 729 "SAFE_CAST": lambda self: self._parse_cast(False), 730 "STRING_AGG": lambda self: self._parse_string_agg(), 731 "SUBSTRING": lambda self: self._parse_substring(), 732 "TRIM": lambda self: self._parse_trim(), 733 "TRY_CAST": lambda self: self._parse_cast(False), 734 "TRY_CONVERT": lambda self: self._parse_convert(False), 735 } 736 737 QUERY_MODIFIER_PARSERS = { 738 "joins": lambda self: list(iter(self._parse_join, None)), 739 "laterals": lambda self: list(iter(self._parse_lateral, None)), 740 "match": lambda self: self._parse_match_recognize(), 741 "where": lambda self: self._parse_where(), 742 "group": lambda self: self._parse_group(), 743 "having": lambda self: self._parse_having(), 744 "qualify": lambda self: self._parse_qualify(), 745 "windows": lambda self: self._parse_window_clause(), 746 "order": lambda self: self._parse_order(), 747 "limit": lambda self: self._parse_limit(), 748 "offset": lambda self: self._parse_offset(), 749 "locks": lambda self: self._parse_locks(), 750 "sample": lambda self: self._parse_table_sample(as_modifier=True), 751 } 752 753 SET_PARSERS = { 754 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 755 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 756 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 757 "TRANSACTION": lambda self: self._parse_set_transaction(), 758 } 759 760 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 761 762 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 763 764 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 765 766 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 767 768 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 769 770 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 771 TRANSACTION_CHARACTERISTICS = { 772 "ISOLATION LEVEL REPEATABLE READ", 773 "ISOLATION LEVEL READ COMMITTED", 774 "ISOLATION LEVEL READ UNCOMMITTED", 775 "ISOLATION LEVEL SERIALIZABLE", 776 "READ WRITE", 777 "READ ONLY", 778 } 779 780 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 781 782 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 783 784 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 785 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 786 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 787 788 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 789 790 STRICT_CAST = True 791 792 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 793 794 CONVERT_TYPE_FIRST = False 795 796 PREFIXED_PIVOT_COLUMNS = False 797 IDENTIFY_PIVOT_STRINGS = False 798 799 LOG_BASE_FIRST = True 800 LOG_DEFAULTS_TO_LN = False 801 802 __slots__ = ( 803 "error_level", 804 "error_message_context", 805 "max_errors", 806 "sql", 807 "errors", 808 "_tokens", 809 "_index", 810 "_curr", 811 "_next", 812 "_prev", 813 "_prev_comments", 814 ) 815 816 # Autofilled 817 INDEX_OFFSET: int = 0 818 UNNEST_COLUMN_ONLY: bool = False 819 ALIAS_POST_TABLESAMPLE: bool = False 820 STRICT_STRING_CONCAT = False 821 NULL_ORDERING: str = "nulls_are_small" 822 SHOW_TRIE: t.Dict = {} 823 SET_TRIE: t.Dict = {} 824 FORMAT_MAPPING: t.Dict[str, str] = {} 825 FORMAT_TRIE: t.Dict = {} 826 TIME_MAPPING: t.Dict[str, str] = {} 827 TIME_TRIE: t.Dict = {} 828 829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset() 839 840 def reset(self): 841 self.sql = "" 842 self.errors = [] 843 self._tokens = [] 844 self._index = 0 845 self._curr = None 846 self._next = None 847 self._prev = None 848 self._prev_comments = None 849 850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 ) 867 868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1] 903 904 def _parse( 905 self, 906 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 907 raw_tokens: t.List[Token], 908 sql: t.Optional[str] = None, 909 ) -> t.List[t.Optional[exp.Expression]]: 910 self.reset() 911 self.sql = sql or "" 912 913 total = len(raw_tokens) 914 chunks: t.List[t.List[Token]] = [[]] 915 916 for i, token in enumerate(raw_tokens): 917 if token.token_type == TokenType.SEMICOLON: 918 if i < total - 1: 919 chunks.append([]) 920 else: 921 chunks[-1].append(token) 922 923 expressions = [] 924 925 for tokens in chunks: 926 self._index = -1 927 self._tokens = tokens 928 self._advance() 929 930 expressions.append(parse_method(self)) 931 932 if self._index < len(self._tokens): 933 self.raise_error("Invalid expression / Unexpected token") 934 935 self.check_errors() 936 937 return expressions 938 939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 ) 949 950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error) 977 978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance) 995 996 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 997 if expression and self._prev_comments: 998 expression.add_comments(self._prev_comments) 999 self._prev_comments = None 1000 1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression 1017 1018 def _find_sql(self, start: Token, end: Token) -> str: 1019 return self.sql[start.start : end.end + 1] 1020 1021 def _advance(self, times: int = 1) -> None: 1022 self._index += times 1023 self._curr = seq_get(self._tokens, self._index) 1024 self._next = seq_get(self._tokens, self._index + 1) 1025 1026 if self._index > 0: 1027 self._prev = self._tokens[self._index - 1] 1028 self._prev_comments = self._prev.comments 1029 else: 1030 self._prev = None 1031 self._prev_comments = None 1032 1033 def _retreat(self, index: int) -> None: 1034 if index != self._index: 1035 self._advance(index - self._index) 1036 1037 def _parse_command(self) -> exp.Command: 1038 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1039 1040 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1041 start = self._prev 1042 exists = self._parse_exists() if allow_exists else None 1043 1044 self._match(TokenType.ON) 1045 1046 kind = self._match_set(self.CREATABLES) and self._prev 1047 if not kind: 1048 return self._parse_as_command(start) 1049 1050 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1051 this = self._parse_user_defined_function(kind=kind.token_type) 1052 elif kind.token_type == TokenType.TABLE: 1053 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1054 elif kind.token_type == TokenType.COLUMN: 1055 this = self._parse_column() 1056 else: 1057 this = self._parse_id_var() 1058 1059 self._match(TokenType.IS) 1060 1061 return self.expression( 1062 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1063 ) 1064 1065 def _parse_to_table( 1066 self, 1067 ) -> exp.ToTableProperty: 1068 table = self._parse_table_parts(schema=True) 1069 return self.expression(exp.ToTableProperty, this=table) 1070 1071 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1072 def _parse_ttl(self) -> exp.Expression: 1073 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1074 this = self._parse_bitwise() 1075 1076 if self._match_text_seq("DELETE"): 1077 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1078 if self._match_text_seq("RECOMPRESS"): 1079 return self.expression( 1080 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1081 ) 1082 if self._match_text_seq("TO", "DISK"): 1083 return self.expression( 1084 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1085 ) 1086 if self._match_text_seq("TO", "VOLUME"): 1087 return self.expression( 1088 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1089 ) 1090 1091 return this 1092 1093 expressions = self._parse_csv(_parse_ttl_action) 1094 where = self._parse_where() 1095 group = self._parse_group() 1096 1097 aggregates = None 1098 if group and self._match(TokenType.SET): 1099 aggregates = self._parse_csv(self._parse_set_item) 1100 1101 return self.expression( 1102 exp.MergeTreeTTL, 1103 expressions=expressions, 1104 where=where, 1105 group=group, 1106 aggregates=aggregates, 1107 ) 1108 1109 def _parse_statement(self) -> t.Optional[exp.Expression]: 1110 if self._curr is None: 1111 return None 1112 1113 if self._match_set(self.STATEMENT_PARSERS): 1114 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1115 1116 if self._match_set(Tokenizer.COMMANDS): 1117 return self._parse_command() 1118 1119 expression = self._parse_expression() 1120 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1121 return self._parse_query_modifiers(expression) 1122 1123 def _parse_drop(self) -> exp.Drop | exp.Command: 1124 start = self._prev 1125 temporary = self._match(TokenType.TEMPORARY) 1126 materialized = self._match_text_seq("MATERIALIZED") 1127 1128 kind = self._match_set(self.CREATABLES) and self._prev.text 1129 if not kind: 1130 return self._parse_as_command(start) 1131 1132 return self.expression( 1133 exp.Drop, 1134 exists=self._parse_exists(), 1135 this=self._parse_table(schema=True), 1136 kind=kind, 1137 temporary=temporary, 1138 materialized=materialized, 1139 cascade=self._match_text_seq("CASCADE"), 1140 constraints=self._match_text_seq("CONSTRAINTS"), 1141 purge=self._match_text_seq("PURGE"), 1142 ) 1143 1144 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1145 return ( 1146 self._match(TokenType.IF) 1147 and (not not_ or self._match(TokenType.NOT)) 1148 and self._match(TokenType.EXISTS) 1149 ) 1150 1151 def _parse_create(self) -> exp.Create | exp.Command: 1152 # Note: this can't be None because we've matched a statement parser 1153 start = self._prev 1154 replace = start.text.upper() == "REPLACE" or self._match_pair( 1155 TokenType.OR, TokenType.REPLACE 1156 ) 1157 unique = self._match(TokenType.UNIQUE) 1158 1159 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1160 self._advance() 1161 1162 properties = None 1163 create_token = self._match_set(self.CREATABLES) and self._prev 1164 1165 if not create_token: 1166 # exp.Properties.Location.POST_CREATE 1167 properties = self._parse_properties() 1168 create_token = self._match_set(self.CREATABLES) and self._prev 1169 1170 if not properties or not create_token: 1171 return self._parse_as_command(start) 1172 1173 exists = self._parse_exists(not_=True) 1174 this = None 1175 expression = None 1176 indexes = None 1177 no_schema_binding = None 1178 begin = None 1179 clone = None 1180 1181 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1182 nonlocal properties 1183 if properties and temp_props: 1184 properties.expressions.extend(temp_props.expressions) 1185 elif temp_props: 1186 properties = temp_props 1187 1188 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1189 this = self._parse_user_defined_function(kind=create_token.token_type) 1190 1191 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1192 extend_props(self._parse_properties()) 1193 1194 self._match(TokenType.ALIAS) 1195 begin = self._match(TokenType.BEGIN) 1196 return_ = self._match_text_seq("RETURN") 1197 expression = self._parse_statement() 1198 1199 if return_: 1200 expression = self.expression(exp.Return, this=expression) 1201 elif create_token.token_type == TokenType.INDEX: 1202 this = self._parse_index(index=self._parse_id_var()) 1203 elif create_token.token_type in self.DB_CREATABLES: 1204 table_parts = self._parse_table_parts(schema=True) 1205 1206 # exp.Properties.Location.POST_NAME 1207 self._match(TokenType.COMMA) 1208 extend_props(self._parse_properties(before=True)) 1209 1210 this = self._parse_schema(this=table_parts) 1211 1212 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1213 extend_props(self._parse_properties()) 1214 1215 self._match(TokenType.ALIAS) 1216 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1217 # exp.Properties.Location.POST_ALIAS 1218 extend_props(self._parse_properties()) 1219 1220 expression = self._parse_ddl_select() 1221 1222 if create_token.token_type == TokenType.TABLE: 1223 indexes = [] 1224 while True: 1225 index = self._parse_index() 1226 1227 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1228 extend_props(self._parse_properties()) 1229 1230 if not index: 1231 break 1232 else: 1233 self._match(TokenType.COMMA) 1234 indexes.append(index) 1235 elif create_token.token_type == TokenType.VIEW: 1236 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1237 no_schema_binding = True 1238 1239 if self._match_text_seq("CLONE"): 1240 clone = self._parse_table(schema=True) 1241 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1242 clone_kind = ( 1243 self._match(TokenType.L_PAREN) 1244 and self._match_texts(self.CLONE_KINDS) 1245 and self._prev.text.upper() 1246 ) 1247 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1248 self._match(TokenType.R_PAREN) 1249 clone = self.expression( 1250 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1251 ) 1252 1253 return self.expression( 1254 exp.Create, 1255 this=this, 1256 kind=create_token.text, 1257 replace=replace, 1258 unique=unique, 1259 expression=expression, 1260 exists=exists, 1261 properties=properties, 1262 indexes=indexes, 1263 no_schema_binding=no_schema_binding, 1264 begin=begin, 1265 clone=clone, 1266 ) 1267 1268 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1269 # only used for teradata currently 1270 self._match(TokenType.COMMA) 1271 1272 kwargs = { 1273 "no": self._match_text_seq("NO"), 1274 "dual": self._match_text_seq("DUAL"), 1275 "before": self._match_text_seq("BEFORE"), 1276 "default": self._match_text_seq("DEFAULT"), 1277 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1278 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1279 "after": self._match_text_seq("AFTER"), 1280 "minimum": self._match_texts(("MIN", "MINIMUM")), 1281 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1282 } 1283 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1286 try: 1287 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1288 except TypeError: 1289 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1290 1291 return None 1292 1293 def _parse_property(self) -> t.Optional[exp.Expression]: 1294 if self._match_texts(self.PROPERTY_PARSERS): 1295 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1296 1297 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1298 return self._parse_character_set(default=True) 1299 1300 if self._match_text_seq("COMPOUND", "SORTKEY"): 1301 return self._parse_sortkey(compound=True) 1302 1303 if self._match_text_seq("SQL", "SECURITY"): 1304 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1305 1306 assignment = self._match_pair( 1307 TokenType.VAR, TokenType.EQ, advance=False 1308 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1309 1310 if assignment: 1311 key = self._parse_var_or_string() 1312 self._match(TokenType.EQ) 1313 return self.expression(exp.Property, this=key, value=self._parse_column()) 1314 1315 return None 1316 1317 def _parse_stored(self) -> exp.FileFormatProperty: 1318 self._match(TokenType.ALIAS) 1319 1320 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1321 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1322 1323 return self.expression( 1324 exp.FileFormatProperty, 1325 this=self.expression( 1326 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1327 ) 1328 if input_format or output_format 1329 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1330 ) 1331 1332 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1333 self._match(TokenType.EQ) 1334 self._match(TokenType.ALIAS) 1335 return self.expression(exp_class, this=self._parse_field()) 1336 1337 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1338 properties = [] 1339 while True: 1340 if before: 1341 prop = self._parse_property_before() 1342 else: 1343 prop = self._parse_property() 1344 1345 if not prop: 1346 break 1347 for p in ensure_list(prop): 1348 properties.append(p) 1349 1350 if properties: 1351 return self.expression(exp.Properties, expressions=properties) 1352 1353 return None 1354 1355 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1356 return self.expression( 1357 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1358 ) 1359 1360 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1361 if self._index >= 2: 1362 pre_volatile_token = self._tokens[self._index - 2] 1363 else: 1364 pre_volatile_token = None 1365 1366 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1367 return exp.VolatileProperty() 1368 1369 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1370 1371 def _parse_with_property( 1372 self, 1373 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1374 self._match(TokenType.WITH) 1375 if self._match(TokenType.L_PAREN, advance=False): 1376 return self._parse_wrapped_csv(self._parse_property) 1377 1378 if self._match_text_seq("JOURNAL"): 1379 return self._parse_withjournaltable() 1380 1381 if self._match_text_seq("DATA"): 1382 return self._parse_withdata(no=False) 1383 elif self._match_text_seq("NO", "DATA"): 1384 return self._parse_withdata(no=True) 1385 1386 if not self._next: 1387 return None 1388 1389 return self._parse_withisolatedloading() 1390 1391 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1392 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1393 self._match(TokenType.EQ) 1394 1395 user = self._parse_id_var() 1396 self._match(TokenType.PARAMETER) 1397 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1398 1399 if not user or not host: 1400 return None 1401 1402 return exp.DefinerProperty(this=f"{user}@{host}") 1403 1404 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1405 self._match(TokenType.TABLE) 1406 self._match(TokenType.EQ) 1407 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1408 1409 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1410 return self.expression(exp.LogProperty, no=no) 1411 1412 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1413 return self.expression(exp.JournalProperty, **kwargs) 1414 1415 def _parse_checksum(self) -> exp.ChecksumProperty: 1416 self._match(TokenType.EQ) 1417 1418 on = None 1419 if self._match(TokenType.ON): 1420 on = True 1421 elif self._match_text_seq("OFF"): 1422 on = False 1423 1424 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1425 1426 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1427 if not self._match_text_seq("BY"): 1428 self._retreat(self._index - 1) 1429 return None 1430 1431 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1432 1433 def _parse_freespace(self) -> exp.FreespaceProperty: 1434 self._match(TokenType.EQ) 1435 return self.expression( 1436 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1437 ) 1438 1439 def _parse_mergeblockratio( 1440 self, no: bool = False, default: bool = False 1441 ) -> exp.MergeBlockRatioProperty: 1442 if self._match(TokenType.EQ): 1443 return self.expression( 1444 exp.MergeBlockRatioProperty, 1445 this=self._parse_number(), 1446 percent=self._match(TokenType.PERCENT), 1447 ) 1448 1449 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1450 1451 def _parse_datablocksize( 1452 self, 1453 default: t.Optional[bool] = None, 1454 minimum: t.Optional[bool] = None, 1455 maximum: t.Optional[bool] = None, 1456 ) -> exp.DataBlocksizeProperty: 1457 self._match(TokenType.EQ) 1458 size = self._parse_number() 1459 1460 units = None 1461 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1462 units = self._prev.text 1463 1464 return self.expression( 1465 exp.DataBlocksizeProperty, 1466 size=size, 1467 units=units, 1468 default=default, 1469 minimum=minimum, 1470 maximum=maximum, 1471 ) 1472 1473 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1474 self._match(TokenType.EQ) 1475 always = self._match_text_seq("ALWAYS") 1476 manual = self._match_text_seq("MANUAL") 1477 never = self._match_text_seq("NEVER") 1478 default = self._match_text_seq("DEFAULT") 1479 1480 autotemp = None 1481 if self._match_text_seq("AUTOTEMP"): 1482 autotemp = self._parse_schema() 1483 1484 return self.expression( 1485 exp.BlockCompressionProperty, 1486 always=always, 1487 manual=manual, 1488 never=never, 1489 default=default, 1490 autotemp=autotemp, 1491 ) 1492 1493 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1494 no = self._match_text_seq("NO") 1495 concurrent = self._match_text_seq("CONCURRENT") 1496 self._match_text_seq("ISOLATED", "LOADING") 1497 for_all = self._match_text_seq("FOR", "ALL") 1498 for_insert = self._match_text_seq("FOR", "INSERT") 1499 for_none = self._match_text_seq("FOR", "NONE") 1500 return self.expression( 1501 exp.IsolatedLoadingProperty, 1502 no=no, 1503 concurrent=concurrent, 1504 for_all=for_all, 1505 for_insert=for_insert, 1506 for_none=for_none, 1507 ) 1508 1509 def _parse_locking(self) -> exp.LockingProperty: 1510 if self._match(TokenType.TABLE): 1511 kind = "TABLE" 1512 elif self._match(TokenType.VIEW): 1513 kind = "VIEW" 1514 elif self._match(TokenType.ROW): 1515 kind = "ROW" 1516 elif self._match_text_seq("DATABASE"): 1517 kind = "DATABASE" 1518 else: 1519 kind = None 1520 1521 if kind in ("DATABASE", "TABLE", "VIEW"): 1522 this = self._parse_table_parts() 1523 else: 1524 this = None 1525 1526 if self._match(TokenType.FOR): 1527 for_or_in = "FOR" 1528 elif self._match(TokenType.IN): 1529 for_or_in = "IN" 1530 else: 1531 for_or_in = None 1532 1533 if self._match_text_seq("ACCESS"): 1534 lock_type = "ACCESS" 1535 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1536 lock_type = "EXCLUSIVE" 1537 elif self._match_text_seq("SHARE"): 1538 lock_type = "SHARE" 1539 elif self._match_text_seq("READ"): 1540 lock_type = "READ" 1541 elif self._match_text_seq("WRITE"): 1542 lock_type = "WRITE" 1543 elif self._match_text_seq("CHECKSUM"): 1544 lock_type = "CHECKSUM" 1545 else: 1546 lock_type = None 1547 1548 override = self._match_text_seq("OVERRIDE") 1549 1550 return self.expression( 1551 exp.LockingProperty, 1552 this=this, 1553 kind=kind, 1554 for_or_in=for_or_in, 1555 lock_type=lock_type, 1556 override=override, 1557 ) 1558 1559 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1560 if self._match(TokenType.PARTITION_BY): 1561 return self._parse_csv(self._parse_conjunction) 1562 return [] 1563 1564 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1565 self._match(TokenType.EQ) 1566 return self.expression( 1567 exp.PartitionedByProperty, 1568 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1569 ) 1570 1571 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1572 if self._match_text_seq("AND", "STATISTICS"): 1573 statistics = True 1574 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1575 statistics = False 1576 else: 1577 statistics = None 1578 1579 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1580 1581 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1582 if self._match_text_seq("PRIMARY", "INDEX"): 1583 return exp.NoPrimaryIndexProperty() 1584 return None 1585 1586 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1587 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1588 return exp.OnCommitProperty() 1589 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1590 return exp.OnCommitProperty(delete=True) 1591 return None 1592 1593 def _parse_distkey(self) -> exp.DistKeyProperty: 1594 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1595 1596 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1597 table = self._parse_table(schema=True) 1598 1599 options = [] 1600 while self._match_texts(("INCLUDING", "EXCLUDING")): 1601 this = self._prev.text.upper() 1602 1603 id_var = self._parse_id_var() 1604 if not id_var: 1605 return None 1606 1607 options.append( 1608 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1609 ) 1610 1611 return self.expression(exp.LikeProperty, this=table, expressions=options) 1612 1613 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1614 return self.expression( 1615 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1616 ) 1617 1618 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1619 self._match(TokenType.EQ) 1620 return self.expression( 1621 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1622 ) 1623 1624 def _parse_returns(self) -> exp.ReturnsProperty: 1625 value: t.Optional[exp.Expression] 1626 is_table = self._match(TokenType.TABLE) 1627 1628 if is_table: 1629 if self._match(TokenType.LT): 1630 value = self.expression( 1631 exp.Schema, 1632 this="TABLE", 1633 expressions=self._parse_csv(self._parse_struct_types), 1634 ) 1635 if not self._match(TokenType.GT): 1636 self.raise_error("Expecting >") 1637 else: 1638 value = self._parse_schema(exp.var("TABLE")) 1639 else: 1640 value = self._parse_types() 1641 1642 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1643 1644 def _parse_describe(self) -> exp.Describe: 1645 kind = self._match_set(self.CREATABLES) and self._prev.text 1646 this = self._parse_table() 1647 return self.expression(exp.Describe, this=this, kind=kind) 1648 1649 def _parse_insert(self) -> exp.Insert: 1650 overwrite = self._match(TokenType.OVERWRITE) 1651 local = self._match_text_seq("LOCAL") 1652 alternative = None 1653 1654 if self._match_text_seq("DIRECTORY"): 1655 this: t.Optional[exp.Expression] = self.expression( 1656 exp.Directory, 1657 this=self._parse_var_or_string(), 1658 local=local, 1659 row_format=self._parse_row_format(match_row=True), 1660 ) 1661 else: 1662 if self._match(TokenType.OR): 1663 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1664 1665 self._match(TokenType.INTO) 1666 self._match(TokenType.TABLE) 1667 this = self._parse_table(schema=True) 1668 1669 return self.expression( 1670 exp.Insert, 1671 this=this, 1672 exists=self._parse_exists(), 1673 partition=self._parse_partition(), 1674 expression=self._parse_ddl_select(), 1675 conflict=self._parse_on_conflict(), 1676 returning=self._parse_returning(), 1677 overwrite=overwrite, 1678 alternative=alternative, 1679 ) 1680 1681 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1682 conflict = self._match_text_seq("ON", "CONFLICT") 1683 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1684 1685 if not conflict and not duplicate: 1686 return None 1687 1688 nothing = None 1689 expressions = None 1690 key = None 1691 constraint = None 1692 1693 if conflict: 1694 if self._match_text_seq("ON", "CONSTRAINT"): 1695 constraint = self._parse_id_var() 1696 else: 1697 key = self._parse_csv(self._parse_value) 1698 1699 self._match_text_seq("DO") 1700 if self._match_text_seq("NOTHING"): 1701 nothing = True 1702 else: 1703 self._match(TokenType.UPDATE) 1704 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1705 1706 return self.expression( 1707 exp.OnConflict, 1708 duplicate=duplicate, 1709 expressions=expressions, 1710 nothing=nothing, 1711 key=key, 1712 constraint=constraint, 1713 ) 1714 1715 def _parse_returning(self) -> t.Optional[exp.Returning]: 1716 if not self._match(TokenType.RETURNING): 1717 return None 1718 1719 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1720 1721 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1722 if not self._match(TokenType.FORMAT): 1723 return None 1724 return self._parse_row_format() 1725 1726 def _parse_row_format( 1727 self, match_row: bool = False 1728 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1729 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1730 return None 1731 1732 if self._match_text_seq("SERDE"): 1733 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1734 1735 self._match_text_seq("DELIMITED") 1736 1737 kwargs = {} 1738 1739 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1740 kwargs["fields"] = self._parse_string() 1741 if self._match_text_seq("ESCAPED", "BY"): 1742 kwargs["escaped"] = self._parse_string() 1743 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1744 kwargs["collection_items"] = self._parse_string() 1745 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1746 kwargs["map_keys"] = self._parse_string() 1747 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1748 kwargs["lines"] = self._parse_string() 1749 if self._match_text_seq("NULL", "DEFINED", "AS"): 1750 kwargs["null"] = self._parse_string() 1751 1752 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1753 1754 def _parse_load(self) -> exp.LoadData | exp.Command: 1755 if self._match_text_seq("DATA"): 1756 local = self._match_text_seq("LOCAL") 1757 self._match_text_seq("INPATH") 1758 inpath = self._parse_string() 1759 overwrite = self._match(TokenType.OVERWRITE) 1760 self._match_pair(TokenType.INTO, TokenType.TABLE) 1761 1762 return self.expression( 1763 exp.LoadData, 1764 this=self._parse_table(schema=True), 1765 local=local, 1766 overwrite=overwrite, 1767 inpath=inpath, 1768 partition=self._parse_partition(), 1769 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1770 serde=self._match_text_seq("SERDE") and self._parse_string(), 1771 ) 1772 return self._parse_as_command(self._prev) 1773 1774 def _parse_delete(self) -> exp.Delete: 1775 self._match(TokenType.FROM) 1776 1777 return self.expression( 1778 exp.Delete, 1779 this=self._parse_table(), 1780 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1781 where=self._parse_where(), 1782 returning=self._parse_returning(), 1783 ) 1784 1785 def _parse_update(self) -> exp.Update: 1786 return self.expression( 1787 exp.Update, 1788 **{ # type: ignore 1789 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1790 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1791 "from": self._parse_from(modifiers=True), 1792 "where": self._parse_where(), 1793 "returning": self._parse_returning(), 1794 }, 1795 ) 1796 1797 def _parse_uncache(self) -> exp.Uncache: 1798 if not self._match(TokenType.TABLE): 1799 self.raise_error("Expecting TABLE after UNCACHE") 1800 1801 return self.expression( 1802 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1803 ) 1804 1805 def _parse_cache(self) -> exp.Cache: 1806 lazy = self._match_text_seq("LAZY") 1807 self._match(TokenType.TABLE) 1808 table = self._parse_table(schema=True) 1809 1810 options = [] 1811 if self._match_text_seq("OPTIONS"): 1812 self._match_l_paren() 1813 k = self._parse_string() 1814 self._match(TokenType.EQ) 1815 v = self._parse_string() 1816 options = [k, v] 1817 self._match_r_paren() 1818 1819 self._match(TokenType.ALIAS) 1820 return self.expression( 1821 exp.Cache, 1822 this=table, 1823 lazy=lazy, 1824 options=options, 1825 expression=self._parse_select(nested=True), 1826 ) 1827 1828 def _parse_partition(self) -> t.Optional[exp.Partition]: 1829 if not self._match(TokenType.PARTITION): 1830 return None 1831 1832 return self.expression( 1833 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1834 ) 1835 1836 def _parse_value(self) -> exp.Tuple: 1837 if self._match(TokenType.L_PAREN): 1838 expressions = self._parse_csv(self._parse_conjunction) 1839 self._match_r_paren() 1840 return self.expression(exp.Tuple, expressions=expressions) 1841 1842 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1843 # Source: https://prestodb.io/docs/current/sql/values.html 1844 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1845 1846 def _parse_select( 1847 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1848 ) -> t.Optional[exp.Expression]: 1849 cte = self._parse_with() 1850 if cte: 1851 this = self._parse_statement() 1852 1853 if not this: 1854 self.raise_error("Failed to parse any statement following CTE") 1855 return cte 1856 1857 if "with" in this.arg_types: 1858 this.set("with", cte) 1859 else: 1860 self.raise_error(f"{this.key} does not support CTE") 1861 this = cte 1862 elif self._match(TokenType.SELECT): 1863 comments = self._prev_comments 1864 1865 hint = self._parse_hint() 1866 all_ = self._match(TokenType.ALL) 1867 distinct = self._match(TokenType.DISTINCT) 1868 1869 kind = ( 1870 self._match(TokenType.ALIAS) 1871 and self._match_texts(("STRUCT", "VALUE")) 1872 and self._prev.text 1873 ) 1874 1875 if distinct: 1876 distinct = self.expression( 1877 exp.Distinct, 1878 on=self._parse_value() if self._match(TokenType.ON) else None, 1879 ) 1880 1881 if all_ and distinct: 1882 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1883 1884 limit = self._parse_limit(top=True) 1885 expressions = self._parse_csv(self._parse_expression) 1886 1887 this = self.expression( 1888 exp.Select, 1889 kind=kind, 1890 hint=hint, 1891 distinct=distinct, 1892 expressions=expressions, 1893 limit=limit, 1894 ) 1895 this.comments = comments 1896 1897 into = self._parse_into() 1898 if into: 1899 this.set("into", into) 1900 1901 from_ = self._parse_from() 1902 if from_: 1903 this.set("from", from_) 1904 1905 this = self._parse_query_modifiers(this) 1906 elif (table or nested) and self._match(TokenType.L_PAREN): 1907 if self._match(TokenType.PIVOT): 1908 this = self._parse_simplified_pivot() 1909 elif self._match(TokenType.FROM): 1910 this = exp.select("*").from_( 1911 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1912 ) 1913 else: 1914 this = self._parse_table() if table else self._parse_select(nested=True) 1915 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1916 1917 self._match_r_paren() 1918 1919 # early return so that subquery unions aren't parsed again 1920 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1921 # Union ALL should be a property of the top select node, not the subquery 1922 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1923 elif self._match(TokenType.VALUES): 1924 this = self.expression( 1925 exp.Values, 1926 expressions=self._parse_csv(self._parse_value), 1927 alias=self._parse_table_alias(), 1928 ) 1929 else: 1930 this = None 1931 1932 return self._parse_set_operations(this) 1933 1934 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1935 if not skip_with_token and not self._match(TokenType.WITH): 1936 return None 1937 1938 comments = self._prev_comments 1939 recursive = self._match(TokenType.RECURSIVE) 1940 1941 expressions = [] 1942 while True: 1943 expressions.append(self._parse_cte()) 1944 1945 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1946 break 1947 else: 1948 self._match(TokenType.WITH) 1949 1950 return self.expression( 1951 exp.With, comments=comments, expressions=expressions, recursive=recursive 1952 ) 1953 1954 def _parse_cte(self) -> exp.CTE: 1955 alias = self._parse_table_alias() 1956 if not alias or not alias.this: 1957 self.raise_error("Expected CTE to have alias") 1958 1959 self._match(TokenType.ALIAS) 1960 return self.expression( 1961 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1962 ) 1963 1964 def _parse_table_alias( 1965 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1966 ) -> t.Optional[exp.TableAlias]: 1967 any_token = self._match(TokenType.ALIAS) 1968 alias = ( 1969 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1970 or self._parse_string_as_identifier() 1971 ) 1972 1973 index = self._index 1974 if self._match(TokenType.L_PAREN): 1975 columns = self._parse_csv(self._parse_function_parameter) 1976 self._match_r_paren() if columns else self._retreat(index) 1977 else: 1978 columns = None 1979 1980 if not alias and not columns: 1981 return None 1982 1983 return self.expression(exp.TableAlias, this=alias, columns=columns) 1984 1985 def _parse_subquery( 1986 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1987 ) -> t.Optional[exp.Subquery]: 1988 if not this: 1989 return None 1990 1991 return self.expression( 1992 exp.Subquery, 1993 this=this, 1994 pivots=self._parse_pivots(), 1995 alias=self._parse_table_alias() if parse_alias else None, 1996 ) 1997 1998 def _parse_query_modifiers( 1999 self, this: t.Optional[exp.Expression] 2000 ) -> t.Optional[exp.Expression]: 2001 if isinstance(this, self.MODIFIABLES): 2002 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2003 expression = parser(self) 2004 2005 if expression: 2006 if key == "limit": 2007 offset = expression.args.pop("offset", None) 2008 if offset: 2009 this.set("offset", exp.Offset(expression=offset)) 2010 this.set(key, expression) 2011 return this 2012 2013 def _parse_hint(self) -> t.Optional[exp.Hint]: 2014 if self._match(TokenType.HINT): 2015 hints = self._parse_csv(self._parse_function) 2016 2017 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2018 self.raise_error("Expected */ after HINT") 2019 2020 return self.expression(exp.Hint, expressions=hints) 2021 2022 return None 2023 2024 def _parse_into(self) -> t.Optional[exp.Into]: 2025 if not self._match(TokenType.INTO): 2026 return None 2027 2028 temp = self._match(TokenType.TEMPORARY) 2029 unlogged = self._match_text_seq("UNLOGGED") 2030 self._match(TokenType.TABLE) 2031 2032 return self.expression( 2033 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2034 ) 2035 2036 def _parse_from( 2037 self, modifiers: bool = False, skip_from_token: bool = False 2038 ) -> t.Optional[exp.From]: 2039 if not skip_from_token and not self._match(TokenType.FROM): 2040 return None 2041 2042 comments = self._prev_comments 2043 this = self._parse_table() 2044 2045 return self.expression( 2046 exp.From, 2047 comments=comments, 2048 this=self._parse_query_modifiers(this) if modifiers else this, 2049 ) 2050 2051 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2052 if not self._match(TokenType.MATCH_RECOGNIZE): 2053 return None 2054 2055 self._match_l_paren() 2056 2057 partition = self._parse_partition_by() 2058 order = self._parse_order() 2059 measures = ( 2060 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2061 ) 2062 2063 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2064 rows = exp.var("ONE ROW PER MATCH") 2065 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2066 text = "ALL ROWS PER MATCH" 2067 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2068 text += f" SHOW EMPTY MATCHES" 2069 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2070 text += f" OMIT EMPTY MATCHES" 2071 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2072 text += f" WITH UNMATCHED ROWS" 2073 rows = exp.var(text) 2074 else: 2075 rows = None 2076 2077 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2078 text = "AFTER MATCH SKIP" 2079 if self._match_text_seq("PAST", "LAST", "ROW"): 2080 text += f" PAST LAST ROW" 2081 elif self._match_text_seq("TO", "NEXT", "ROW"): 2082 text += f" TO NEXT ROW" 2083 elif self._match_text_seq("TO", "FIRST"): 2084 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2085 elif self._match_text_seq("TO", "LAST"): 2086 text += f" TO LAST {self._advance_any().text}" # type: ignore 2087 after = exp.var(text) 2088 else: 2089 after = None 2090 2091 if self._match_text_seq("PATTERN"): 2092 self._match_l_paren() 2093 2094 if not self._curr: 2095 self.raise_error("Expecting )", self._curr) 2096 2097 paren = 1 2098 start = self._curr 2099 2100 while self._curr and paren > 0: 2101 if self._curr.token_type == TokenType.L_PAREN: 2102 paren += 1 2103 if self._curr.token_type == TokenType.R_PAREN: 2104 paren -= 1 2105 2106 end = self._prev 2107 self._advance() 2108 2109 if paren > 0: 2110 self.raise_error("Expecting )", self._curr) 2111 2112 pattern = exp.var(self._find_sql(start, end)) 2113 else: 2114 pattern = None 2115 2116 define = ( 2117 self._parse_csv( 2118 lambda: self.expression( 2119 exp.Alias, 2120 alias=self._parse_id_var(any_token=True), 2121 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2122 ) 2123 ) 2124 if self._match_text_seq("DEFINE") 2125 else None 2126 ) 2127 2128 self._match_r_paren() 2129 2130 return self.expression( 2131 exp.MatchRecognize, 2132 partition_by=partition, 2133 order=order, 2134 measures=measures, 2135 rows=rows, 2136 after=after, 2137 pattern=pattern, 2138 define=define, 2139 alias=self._parse_table_alias(), 2140 ) 2141 2142 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2143 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2144 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2145 2146 if outer_apply or cross_apply: 2147 this = self._parse_select(table=True) 2148 view = None 2149 outer = not cross_apply 2150 elif self._match(TokenType.LATERAL): 2151 this = self._parse_select(table=True) 2152 view = self._match(TokenType.VIEW) 2153 outer = self._match(TokenType.OUTER) 2154 else: 2155 return None 2156 2157 if not this: 2158 this = self._parse_function() or self._parse_id_var(any_token=False) 2159 while self._match(TokenType.DOT): 2160 this = exp.Dot( 2161 this=this, 2162 expression=self._parse_function() or self._parse_id_var(any_token=False), 2163 ) 2164 2165 if view: 2166 table = self._parse_id_var(any_token=False) 2167 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2168 table_alias: t.Optional[exp.TableAlias] = self.expression( 2169 exp.TableAlias, this=table, columns=columns 2170 ) 2171 elif isinstance(this, exp.Subquery) and this.alias: 2172 # Ensures parity between the Subquery's and the Lateral's "alias" args 2173 table_alias = this.args["alias"].copy() 2174 else: 2175 table_alias = self._parse_table_alias() 2176 2177 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2178 2179 def _parse_join_parts( 2180 self, 2181 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2182 return ( 2183 self._match_set(self.JOIN_METHODS) and self._prev, 2184 self._match_set(self.JOIN_SIDES) and self._prev, 2185 self._match_set(self.JOIN_KINDS) and self._prev, 2186 ) 2187 2188 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2189 if self._match(TokenType.COMMA): 2190 return self.expression(exp.Join, this=self._parse_table()) 2191 2192 index = self._index 2193 method, side, kind = self._parse_join_parts() 2194 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2195 join = self._match(TokenType.JOIN) 2196 2197 if not skip_join_token and not join: 2198 self._retreat(index) 2199 kind = None 2200 method = None 2201 side = None 2202 2203 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2204 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2205 2206 if not skip_join_token and not join and not outer_apply and not cross_apply: 2207 return None 2208 2209 if outer_apply: 2210 side = Token(TokenType.LEFT, "LEFT") 2211 2212 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2213 2214 if method: 2215 kwargs["method"] = method.text 2216 if side: 2217 kwargs["side"] = side.text 2218 if kind: 2219 kwargs["kind"] = kind.text 2220 if hint: 2221 kwargs["hint"] = hint 2222 2223 if self._match(TokenType.ON): 2224 kwargs["on"] = self._parse_conjunction() 2225 elif self._match(TokenType.USING): 2226 kwargs["using"] = self._parse_wrapped_id_vars() 2227 2228 return self.expression(exp.Join, **kwargs) 2229 2230 def _parse_index( 2231 self, 2232 index: t.Optional[exp.Expression] = None, 2233 ) -> t.Optional[exp.Index]: 2234 if index: 2235 unique = None 2236 primary = None 2237 amp = None 2238 2239 self._match(TokenType.ON) 2240 self._match(TokenType.TABLE) # hive 2241 table = self._parse_table_parts(schema=True) 2242 else: 2243 unique = self._match(TokenType.UNIQUE) 2244 primary = self._match_text_seq("PRIMARY") 2245 amp = self._match_text_seq("AMP") 2246 2247 if not self._match(TokenType.INDEX): 2248 return None 2249 2250 index = self._parse_id_var() 2251 table = None 2252 2253 using = self._parse_field() if self._match(TokenType.USING) else None 2254 2255 if self._match(TokenType.L_PAREN, advance=False): 2256 columns = self._parse_wrapped_csv(self._parse_ordered) 2257 else: 2258 columns = None 2259 2260 return self.expression( 2261 exp.Index, 2262 this=index, 2263 table=table, 2264 using=using, 2265 columns=columns, 2266 unique=unique, 2267 primary=primary, 2268 amp=amp, 2269 partition_by=self._parse_partition_by(), 2270 ) 2271 2272 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2273 return ( 2274 (not schema and self._parse_function(optional_parens=False)) 2275 or self._parse_id_var(any_token=False) 2276 or self._parse_string_as_identifier() 2277 or self._parse_placeholder() 2278 ) 2279 2280 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2281 catalog = None 2282 db = None 2283 table = self._parse_table_part(schema=schema) 2284 2285 while self._match(TokenType.DOT): 2286 if catalog: 2287 # This allows nesting the table in arbitrarily many dot expressions if needed 2288 table = self.expression( 2289 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2290 ) 2291 else: 2292 catalog = db 2293 db = table 2294 table = self._parse_table_part(schema=schema) 2295 2296 if not table: 2297 self.raise_error(f"Expected table name but got {self._curr}") 2298 2299 return self.expression( 2300 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2301 ) 2302 2303 def _parse_table( 2304 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2305 ) -> t.Optional[exp.Expression]: 2306 lateral = self._parse_lateral() 2307 if lateral: 2308 return lateral 2309 2310 unnest = self._parse_unnest() 2311 if unnest: 2312 return unnest 2313 2314 values = self._parse_derived_table_values() 2315 if values: 2316 return values 2317 2318 subquery = self._parse_select(table=True) 2319 if subquery: 2320 if not subquery.args.get("pivots"): 2321 subquery.set("pivots", self._parse_pivots()) 2322 return subquery 2323 2324 this: exp.Expression = self._parse_table_parts(schema=schema) 2325 2326 if schema: 2327 return self._parse_schema(this=this) 2328 2329 if self.ALIAS_POST_TABLESAMPLE: 2330 table_sample = self._parse_table_sample() 2331 2332 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2333 if alias: 2334 this.set("alias", alias) 2335 2336 if not this.args.get("pivots"): 2337 this.set("pivots", self._parse_pivots()) 2338 2339 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2340 this.set( 2341 "hints", 2342 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2343 ) 2344 self._match_r_paren() 2345 2346 if not self.ALIAS_POST_TABLESAMPLE: 2347 table_sample = self._parse_table_sample() 2348 2349 if table_sample: 2350 table_sample.set("this", this) 2351 this = table_sample 2352 2353 return this 2354 2355 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2356 if not self._match(TokenType.UNNEST): 2357 return None 2358 2359 expressions = self._parse_wrapped_csv(self._parse_type) 2360 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2361 2362 alias = self._parse_table_alias() if with_alias else None 2363 2364 if alias and self.UNNEST_COLUMN_ONLY: 2365 if alias.args.get("columns"): 2366 self.raise_error("Unexpected extra column alias in unnest.") 2367 2368 alias.set("columns", [alias.this]) 2369 alias.set("this", None) 2370 2371 offset = None 2372 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2373 self._match(TokenType.ALIAS) 2374 offset = self._parse_id_var() or exp.to_identifier("offset") 2375 2376 return self.expression( 2377 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2378 ) 2379 2380 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2381 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2382 if not is_derived and not self._match(TokenType.VALUES): 2383 return None 2384 2385 expressions = self._parse_csv(self._parse_value) 2386 alias = self._parse_table_alias() 2387 2388 if is_derived: 2389 self._match_r_paren() 2390 2391 return self.expression( 2392 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2393 ) 2394 2395 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2396 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2397 as_modifier and self._match_text_seq("USING", "SAMPLE") 2398 ): 2399 return None 2400 2401 bucket_numerator = None 2402 bucket_denominator = None 2403 bucket_field = None 2404 percent = None 2405 rows = None 2406 size = None 2407 seed = None 2408 2409 kind = ( 2410 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2411 ) 2412 method = self._parse_var(tokens=(TokenType.ROW,)) 2413 2414 self._match(TokenType.L_PAREN) 2415 2416 num = self._parse_number() 2417 2418 if self._match_text_seq("BUCKET"): 2419 bucket_numerator = self._parse_number() 2420 self._match_text_seq("OUT", "OF") 2421 bucket_denominator = bucket_denominator = self._parse_number() 2422 self._match(TokenType.ON) 2423 bucket_field = self._parse_field() 2424 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2425 percent = num 2426 elif self._match(TokenType.ROWS): 2427 rows = num 2428 else: 2429 size = num 2430 2431 self._match(TokenType.R_PAREN) 2432 2433 if self._match(TokenType.L_PAREN): 2434 method = self._parse_var() 2435 seed = self._match(TokenType.COMMA) and self._parse_number() 2436 self._match_r_paren() 2437 elif self._match_texts(("SEED", "REPEATABLE")): 2438 seed = self._parse_wrapped(self._parse_number) 2439 2440 return self.expression( 2441 exp.TableSample, 2442 method=method, 2443 bucket_numerator=bucket_numerator, 2444 bucket_denominator=bucket_denominator, 2445 bucket_field=bucket_field, 2446 percent=percent, 2447 rows=rows, 2448 size=size, 2449 seed=seed, 2450 kind=kind, 2451 ) 2452 2453 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2454 return list(iter(self._parse_pivot, None)) 2455 2456 # https://duckdb.org/docs/sql/statements/pivot 2457 def _parse_simplified_pivot(self) -> exp.Pivot: 2458 def _parse_on() -> t.Optional[exp.Expression]: 2459 this = self._parse_bitwise() 2460 return self._parse_in(this) if self._match(TokenType.IN) else this 2461 2462 this = self._parse_table() 2463 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2464 using = self._match(TokenType.USING) and self._parse_csv( 2465 lambda: self._parse_alias(self._parse_function()) 2466 ) 2467 group = self._parse_group() 2468 return self.expression( 2469 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2470 ) 2471 2472 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2473 index = self._index 2474 2475 if self._match(TokenType.PIVOT): 2476 unpivot = False 2477 elif self._match(TokenType.UNPIVOT): 2478 unpivot = True 2479 else: 2480 return None 2481 2482 expressions = [] 2483 field = None 2484 2485 if not self._match(TokenType.L_PAREN): 2486 self._retreat(index) 2487 return None 2488 2489 if unpivot: 2490 expressions = self._parse_csv(self._parse_column) 2491 else: 2492 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2493 2494 if not expressions: 2495 self.raise_error("Failed to parse PIVOT's aggregation list") 2496 2497 if not self._match(TokenType.FOR): 2498 self.raise_error("Expecting FOR") 2499 2500 value = self._parse_column() 2501 2502 if not self._match(TokenType.IN): 2503 self.raise_error("Expecting IN") 2504 2505 field = self._parse_in(value, alias=True) 2506 2507 self._match_r_paren() 2508 2509 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2510 2511 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2512 pivot.set("alias", self._parse_table_alias()) 2513 2514 if not unpivot: 2515 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2516 2517 columns: t.List[exp.Expression] = [] 2518 for fld in pivot.args["field"].expressions: 2519 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2520 for name in names: 2521 if self.PREFIXED_PIVOT_COLUMNS: 2522 name = f"{name}_{field_name}" if name else field_name 2523 else: 2524 name = f"{field_name}_{name}" if name else field_name 2525 2526 columns.append(exp.to_identifier(name)) 2527 2528 pivot.set("columns", columns) 2529 2530 return pivot 2531 2532 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2533 return [agg.alias for agg in aggregations] 2534 2535 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2536 if not skip_where_token and not self._match(TokenType.WHERE): 2537 return None 2538 2539 return self.expression( 2540 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2541 ) 2542 2543 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2544 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2545 return None 2546 2547 elements = defaultdict(list) 2548 2549 while True: 2550 expressions = self._parse_csv(self._parse_conjunction) 2551 if expressions: 2552 elements["expressions"].extend(expressions) 2553 2554 grouping_sets = self._parse_grouping_sets() 2555 if grouping_sets: 2556 elements["grouping_sets"].extend(grouping_sets) 2557 2558 rollup = None 2559 cube = None 2560 totals = None 2561 2562 with_ = self._match(TokenType.WITH) 2563 if self._match(TokenType.ROLLUP): 2564 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2565 elements["rollup"].extend(ensure_list(rollup)) 2566 2567 if self._match(TokenType.CUBE): 2568 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2569 elements["cube"].extend(ensure_list(cube)) 2570 2571 if self._match_text_seq("TOTALS"): 2572 totals = True 2573 elements["totals"] = True # type: ignore 2574 2575 if not (grouping_sets or rollup or cube or totals): 2576 break 2577 2578 return self.expression(exp.Group, **elements) # type: ignore 2579 2580 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2581 if not self._match(TokenType.GROUPING_SETS): 2582 return None 2583 2584 return self._parse_wrapped_csv(self._parse_grouping_set) 2585 2586 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2587 if self._match(TokenType.L_PAREN): 2588 grouping_set = self._parse_csv(self._parse_column) 2589 self._match_r_paren() 2590 return self.expression(exp.Tuple, expressions=grouping_set) 2591 2592 return self._parse_column() 2593 2594 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2595 if not skip_having_token and not self._match(TokenType.HAVING): 2596 return None 2597 return self.expression(exp.Having, this=self._parse_conjunction()) 2598 2599 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2600 if not self._match(TokenType.QUALIFY): 2601 return None 2602 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2603 2604 def _parse_order( 2605 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2606 ) -> t.Optional[exp.Expression]: 2607 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2608 return this 2609 2610 return self.expression( 2611 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2612 ) 2613 2614 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2615 if not self._match_text_seq(*texts): 2616 return None 2617 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2618 2619 def _parse_ordered(self) -> exp.Ordered: 2620 this = self._parse_conjunction() 2621 self._match(TokenType.ASC) 2622 2623 is_desc = self._match(TokenType.DESC) 2624 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2625 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2626 desc = is_desc or False 2627 asc = not desc 2628 nulls_first = is_nulls_first or False 2629 explicitly_null_ordered = is_nulls_first or is_nulls_last 2630 2631 if ( 2632 not explicitly_null_ordered 2633 and ( 2634 (asc and self.NULL_ORDERING == "nulls_are_small") 2635 or (desc and self.NULL_ORDERING != "nulls_are_small") 2636 ) 2637 and self.NULL_ORDERING != "nulls_are_last" 2638 ): 2639 nulls_first = True 2640 2641 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2642 2643 def _parse_limit( 2644 self, this: t.Optional[exp.Expression] = None, top: bool = False 2645 ) -> t.Optional[exp.Expression]: 2646 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2647 limit_paren = self._match(TokenType.L_PAREN) 2648 expression = self._parse_number() if top else self._parse_term() 2649 2650 if self._match(TokenType.COMMA): 2651 offset = expression 2652 expression = self._parse_term() 2653 else: 2654 offset = None 2655 2656 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2657 2658 if limit_paren: 2659 self._match_r_paren() 2660 2661 return limit_exp 2662 2663 if self._match(TokenType.FETCH): 2664 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2665 direction = self._prev.text if direction else "FIRST" 2666 2667 count = self._parse_number() 2668 percent = self._match(TokenType.PERCENT) 2669 2670 self._match_set((TokenType.ROW, TokenType.ROWS)) 2671 2672 only = self._match_text_seq("ONLY") 2673 with_ties = self._match_text_seq("WITH", "TIES") 2674 2675 if only and with_ties: 2676 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2677 2678 return self.expression( 2679 exp.Fetch, 2680 direction=direction, 2681 count=count, 2682 percent=percent, 2683 with_ties=with_ties, 2684 ) 2685 2686 return this 2687 2688 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2689 if not self._match(TokenType.OFFSET): 2690 return this 2691 2692 count = self._parse_number() 2693 self._match_set((TokenType.ROW, TokenType.ROWS)) 2694 return self.expression(exp.Offset, this=this, expression=count) 2695 2696 def _parse_locks(self) -> t.List[exp.Lock]: 2697 locks = [] 2698 while True: 2699 if self._match_text_seq("FOR", "UPDATE"): 2700 update = True 2701 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2702 "LOCK", "IN", "SHARE", "MODE" 2703 ): 2704 update = False 2705 else: 2706 break 2707 2708 expressions = None 2709 if self._match_text_seq("OF"): 2710 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2711 2712 wait: t.Optional[bool | exp.Expression] = None 2713 if self._match_text_seq("NOWAIT"): 2714 wait = True 2715 elif self._match_text_seq("WAIT"): 2716 wait = self._parse_primary() 2717 elif self._match_text_seq("SKIP", "LOCKED"): 2718 wait = False 2719 2720 locks.append( 2721 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2722 ) 2723 2724 return locks 2725 2726 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2727 if not self._match_set(self.SET_OPERATIONS): 2728 return this 2729 2730 token_type = self._prev.token_type 2731 2732 if token_type == TokenType.UNION: 2733 expression = exp.Union 2734 elif token_type == TokenType.EXCEPT: 2735 expression = exp.Except 2736 else: 2737 expression = exp.Intersect 2738 2739 return self.expression( 2740 expression, 2741 this=this, 2742 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2743 expression=self._parse_set_operations(self._parse_select(nested=True)), 2744 ) 2745 2746 def _parse_expression(self) -> t.Optional[exp.Expression]: 2747 return self._parse_alias(self._parse_conjunction()) 2748 2749 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2750 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2751 2752 def _parse_equality(self) -> t.Optional[exp.Expression]: 2753 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2754 2755 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2756 return self._parse_tokens(self._parse_range, self.COMPARISON) 2757 2758 def _parse_range(self) -> t.Optional[exp.Expression]: 2759 this = self._parse_bitwise() 2760 negate = self._match(TokenType.NOT) 2761 2762 if self._match_set(self.RANGE_PARSERS): 2763 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2764 if not expression: 2765 return this 2766 2767 this = expression 2768 elif self._match(TokenType.ISNULL): 2769 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2770 2771 # Postgres supports ISNULL and NOTNULL for conditions. 2772 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2773 if self._match(TokenType.NOTNULL): 2774 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2775 this = self.expression(exp.Not, this=this) 2776 2777 if negate: 2778 this = self.expression(exp.Not, this=this) 2779 2780 if self._match(TokenType.IS): 2781 this = self._parse_is(this) 2782 2783 return this 2784 2785 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2786 index = self._index - 1 2787 negate = self._match(TokenType.NOT) 2788 2789 if self._match_text_seq("DISTINCT", "FROM"): 2790 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2791 return self.expression(klass, this=this, expression=self._parse_expression()) 2792 2793 expression = self._parse_null() or self._parse_boolean() 2794 if not expression: 2795 self._retreat(index) 2796 return None 2797 2798 this = self.expression(exp.Is, this=this, expression=expression) 2799 return self.expression(exp.Not, this=this) if negate else this 2800 2801 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2802 unnest = self._parse_unnest(with_alias=False) 2803 if unnest: 2804 this = self.expression(exp.In, this=this, unnest=unnest) 2805 elif self._match(TokenType.L_PAREN): 2806 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2807 2808 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2809 this = self.expression(exp.In, this=this, query=expressions[0]) 2810 else: 2811 this = self.expression(exp.In, this=this, expressions=expressions) 2812 2813 self._match_r_paren(this) 2814 else: 2815 this = self.expression(exp.In, this=this, field=self._parse_field()) 2816 2817 return this 2818 2819 def _parse_between(self, this: exp.Expression) -> exp.Between: 2820 low = self._parse_bitwise() 2821 self._match(TokenType.AND) 2822 high = self._parse_bitwise() 2823 return self.expression(exp.Between, this=this, low=low, high=high) 2824 2825 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2826 if not self._match(TokenType.ESCAPE): 2827 return this 2828 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2829 2830 def _parse_interval(self) -> t.Optional[exp.Interval]: 2831 if not self._match(TokenType.INTERVAL): 2832 return None 2833 2834 this = self._parse_primary() or self._parse_term() 2835 unit = self._parse_function() or self._parse_var() 2836 2837 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2838 # each INTERVAL expression into this canonical form so it's easy to transpile 2839 if this and this.is_number: 2840 this = exp.Literal.string(this.name) 2841 elif this and this.is_string: 2842 parts = this.name.split() 2843 2844 if len(parts) == 2: 2845 if unit: 2846 # this is not actually a unit, it's something else 2847 unit = None 2848 self._retreat(self._index - 1) 2849 else: 2850 this = exp.Literal.string(parts[0]) 2851 unit = self.expression(exp.Var, this=parts[1]) 2852 2853 return self.expression(exp.Interval, this=this, unit=unit) 2854 2855 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2856 this = self._parse_term() 2857 2858 while True: 2859 if self._match_set(self.BITWISE): 2860 this = self.expression( 2861 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2862 ) 2863 elif self._match_pair(TokenType.LT, TokenType.LT): 2864 this = self.expression( 2865 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2866 ) 2867 elif self._match_pair(TokenType.GT, TokenType.GT): 2868 this = self.expression( 2869 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2870 ) 2871 else: 2872 break 2873 2874 return this 2875 2876 def _parse_term(self) -> t.Optional[exp.Expression]: 2877 return self._parse_tokens(self._parse_factor, self.TERM) 2878 2879 def _parse_factor(self) -> t.Optional[exp.Expression]: 2880 return self._parse_tokens(self._parse_unary, self.FACTOR) 2881 2882 def _parse_unary(self) -> t.Optional[exp.Expression]: 2883 if self._match_set(self.UNARY_PARSERS): 2884 return self.UNARY_PARSERS[self._prev.token_type](self) 2885 return self._parse_at_time_zone(self._parse_type()) 2886 2887 def _parse_type(self) -> t.Optional[exp.Expression]: 2888 interval = self._parse_interval() 2889 if interval: 2890 return interval 2891 2892 index = self._index 2893 data_type = self._parse_types(check_func=True) 2894 this = self._parse_column() 2895 2896 if data_type: 2897 if isinstance(this, exp.Literal): 2898 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2899 if parser: 2900 return parser(self, this, data_type) 2901 return self.expression(exp.Cast, this=this, to=data_type) 2902 if not data_type.expressions: 2903 self._retreat(index) 2904 return self._parse_column() 2905 return self._parse_column_ops(data_type) 2906 2907 return this 2908 2909 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2910 this = self._parse_type() 2911 if not this: 2912 return None 2913 2914 return self.expression( 2915 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2916 ) 2917 2918 def _parse_types( 2919 self, check_func: bool = False, schema: bool = False 2920 ) -> t.Optional[exp.Expression]: 2921 index = self._index 2922 2923 prefix = self._match_text_seq("SYSUDTLIB", ".") 2924 2925 if not self._match_set(self.TYPE_TOKENS): 2926 return None 2927 2928 type_token = self._prev.token_type 2929 2930 if type_token == TokenType.PSEUDO_TYPE: 2931 return self.expression(exp.PseudoType, this=self._prev.text) 2932 2933 nested = type_token in self.NESTED_TYPE_TOKENS 2934 is_struct = type_token == TokenType.STRUCT 2935 expressions = None 2936 maybe_func = False 2937 2938 if self._match(TokenType.L_PAREN): 2939 if is_struct: 2940 expressions = self._parse_csv(self._parse_struct_types) 2941 elif nested: 2942 expressions = self._parse_csv( 2943 lambda: self._parse_types(check_func=check_func, schema=schema) 2944 ) 2945 elif type_token in self.ENUM_TYPE_TOKENS: 2946 expressions = self._parse_csv(self._parse_primary) 2947 else: 2948 expressions = self._parse_csv(self._parse_type_size) 2949 2950 if not expressions or not self._match(TokenType.R_PAREN): 2951 self._retreat(index) 2952 return None 2953 2954 maybe_func = True 2955 2956 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2957 this = exp.DataType( 2958 this=exp.DataType.Type.ARRAY, 2959 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2960 nested=True, 2961 ) 2962 2963 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2964 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2965 2966 return this 2967 2968 if self._match(TokenType.L_BRACKET): 2969 self._retreat(index) 2970 return None 2971 2972 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2973 if nested and self._match(TokenType.LT): 2974 if is_struct: 2975 expressions = self._parse_csv(self._parse_struct_types) 2976 else: 2977 expressions = self._parse_csv( 2978 lambda: self._parse_types(check_func=check_func, schema=schema) 2979 ) 2980 2981 if not self._match(TokenType.GT): 2982 self.raise_error("Expecting >") 2983 2984 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2985 values = self._parse_csv(self._parse_conjunction) 2986 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2987 2988 value: t.Optional[exp.Expression] = None 2989 if type_token in self.TIMESTAMPS: 2990 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2991 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2992 elif ( 2993 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2994 or type_token == TokenType.TIMESTAMPLTZ 2995 ): 2996 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2997 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2998 if type_token == TokenType.TIME: 2999 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 3000 else: 3001 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3002 3003 maybe_func = maybe_func and value is None 3004 3005 if value is None: 3006 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 3007 elif type_token == TokenType.INTERVAL: 3008 unit = self._parse_var() 3009 3010 if not unit: 3011 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3012 else: 3013 value = self.expression(exp.Interval, unit=unit) 3014 3015 if maybe_func and check_func: 3016 index2 = self._index 3017 peek = self._parse_string() 3018 3019 if not peek: 3020 self._retreat(index) 3021 return None 3022 3023 self._retreat(index2) 3024 3025 if value: 3026 return value 3027 3028 return exp.DataType( 3029 this=exp.DataType.Type[type_token.value.upper()], 3030 expressions=expressions, 3031 nested=nested, 3032 values=values, 3033 prefix=prefix, 3034 ) 3035 3036 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3037 this = self._parse_type() or self._parse_id_var() 3038 self._match(TokenType.COLON) 3039 return self._parse_column_def(this) 3040 3041 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3042 if not self._match_text_seq("AT", "TIME", "ZONE"): 3043 return this 3044 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3045 3046 def _parse_column(self) -> t.Optional[exp.Expression]: 3047 this = self._parse_field() 3048 if isinstance(this, exp.Identifier): 3049 this = self.expression(exp.Column, this=this) 3050 elif not this: 3051 return self._parse_bracket(this) 3052 return self._parse_column_ops(this) 3053 3054 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3055 this = self._parse_bracket(this) 3056 3057 while self._match_set(self.COLUMN_OPERATORS): 3058 op_token = self._prev.token_type 3059 op = self.COLUMN_OPERATORS.get(op_token) 3060 3061 if op_token == TokenType.DCOLON: 3062 field = self._parse_types() 3063 if not field: 3064 self.raise_error("Expected type") 3065 elif op and self._curr: 3066 self._advance() 3067 value = self._prev.text 3068 field = ( 3069 exp.Literal.number(value) 3070 if self._prev.token_type == TokenType.NUMBER 3071 else exp.Literal.string(value) 3072 ) 3073 else: 3074 field = self._parse_field(anonymous_func=True, any_token=True) 3075 3076 if isinstance(field, exp.Func): 3077 # bigquery allows function calls like x.y.count(...) 3078 # SAFE.SUBSTR(...) 3079 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3080 this = self._replace_columns_with_dots(this) 3081 3082 if op: 3083 this = op(self, this, field) 3084 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3085 this = self.expression( 3086 exp.Column, 3087 this=field, 3088 table=this.this, 3089 db=this.args.get("table"), 3090 catalog=this.args.get("db"), 3091 ) 3092 else: 3093 this = self.expression(exp.Dot, this=this, expression=field) 3094 this = self._parse_bracket(this) 3095 return this 3096 3097 def _parse_primary(self) -> t.Optional[exp.Expression]: 3098 if self._match_set(self.PRIMARY_PARSERS): 3099 token_type = self._prev.token_type 3100 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3101 3102 if token_type == TokenType.STRING: 3103 expressions = [primary] 3104 while self._match(TokenType.STRING): 3105 expressions.append(exp.Literal.string(self._prev.text)) 3106 3107 if len(expressions) > 1: 3108 return self.expression(exp.Concat, expressions=expressions) 3109 3110 return primary 3111 3112 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3113 return exp.Literal.number(f"0.{self._prev.text}") 3114 3115 if self._match(TokenType.L_PAREN): 3116 comments = self._prev_comments 3117 query = self._parse_select() 3118 3119 if query: 3120 expressions = [query] 3121 else: 3122 expressions = self._parse_csv(self._parse_expression) 3123 3124 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3125 3126 if isinstance(this, exp.Subqueryable): 3127 this = self._parse_set_operations( 3128 self._parse_subquery(this=this, parse_alias=False) 3129 ) 3130 elif len(expressions) > 1: 3131 this = self.expression(exp.Tuple, expressions=expressions) 3132 else: 3133 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3134 3135 if this: 3136 this.add_comments(comments) 3137 3138 self._match_r_paren(expression=this) 3139 return this 3140 3141 return None 3142 3143 def _parse_field( 3144 self, 3145 any_token: bool = False, 3146 tokens: t.Optional[t.Collection[TokenType]] = None, 3147 anonymous_func: bool = False, 3148 ) -> t.Optional[exp.Expression]: 3149 return ( 3150 self._parse_primary() 3151 or self._parse_function(anonymous=anonymous_func) 3152 or self._parse_id_var(any_token=any_token, tokens=tokens) 3153 ) 3154 3155 def _parse_function( 3156 self, 3157 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3158 anonymous: bool = False, 3159 optional_parens: bool = True, 3160 ) -> t.Optional[exp.Expression]: 3161 if not self._curr: 3162 return None 3163 3164 token_type = self._curr.token_type 3165 3166 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3167 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3168 3169 if not self._next or self._next.token_type != TokenType.L_PAREN: 3170 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3171 self._advance() 3172 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3173 3174 return None 3175 3176 if token_type not in self.FUNC_TOKENS: 3177 return None 3178 3179 this = self._curr.text 3180 upper = this.upper() 3181 self._advance(2) 3182 3183 parser = self.FUNCTION_PARSERS.get(upper) 3184 3185 if parser and not anonymous: 3186 this = parser(self) 3187 else: 3188 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3189 3190 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3191 this = self.expression(subquery_predicate, this=self._parse_select()) 3192 self._match_r_paren() 3193 return this 3194 3195 if functions is None: 3196 functions = self.FUNCTIONS 3197 3198 function = functions.get(upper) 3199 3200 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3201 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3202 3203 if function and not anonymous: 3204 this = self.validate_expression(function(args), args) 3205 else: 3206 this = self.expression(exp.Anonymous, this=this, expressions=args) 3207 3208 self._match_r_paren(this) 3209 return self._parse_window(this) 3210 3211 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3212 return self._parse_column_def(self._parse_id_var()) 3213 3214 def _parse_user_defined_function( 3215 self, kind: t.Optional[TokenType] = None 3216 ) -> t.Optional[exp.Expression]: 3217 this = self._parse_id_var() 3218 3219 while self._match(TokenType.DOT): 3220 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3221 3222 if not self._match(TokenType.L_PAREN): 3223 return this 3224 3225 expressions = self._parse_csv(self._parse_function_parameter) 3226 self._match_r_paren() 3227 return self.expression( 3228 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3229 ) 3230 3231 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3232 literal = self._parse_primary() 3233 if literal: 3234 return self.expression(exp.Introducer, this=token.text, expression=literal) 3235 3236 return self.expression(exp.Identifier, this=token.text) 3237 3238 def _parse_session_parameter(self) -> exp.SessionParameter: 3239 kind = None 3240 this = self._parse_id_var() or self._parse_primary() 3241 3242 if this and self._match(TokenType.DOT): 3243 kind = this.name 3244 this = self._parse_var() or self._parse_primary() 3245 3246 return self.expression(exp.SessionParameter, this=this, kind=kind) 3247 3248 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3249 index = self._index 3250 3251 if self._match(TokenType.L_PAREN): 3252 expressions = self._parse_csv(self._parse_id_var) 3253 3254 if not self._match(TokenType.R_PAREN): 3255 self._retreat(index) 3256 else: 3257 expressions = [self._parse_id_var()] 3258 3259 if self._match_set(self.LAMBDAS): 3260 return self.LAMBDAS[self._prev.token_type](self, expressions) 3261 3262 self._retreat(index) 3263 3264 this: t.Optional[exp.Expression] 3265 3266 if self._match(TokenType.DISTINCT): 3267 this = self.expression( 3268 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3269 ) 3270 else: 3271 this = self._parse_select_or_expression(alias=alias) 3272 3273 if isinstance(this, exp.EQ): 3274 left = this.this 3275 if isinstance(left, exp.Column): 3276 left.replace(exp.var(left.text("this"))) 3277 3278 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3279 3280 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3281 index = self._index 3282 3283 if not self.errors: 3284 try: 3285 if self._parse_select(nested=True): 3286 return this 3287 except ParseError: 3288 pass 3289 finally: 3290 self.errors.clear() 3291 self._retreat(index) 3292 3293 if not self._match(TokenType.L_PAREN): 3294 return this 3295 3296 args = self._parse_csv( 3297 lambda: self._parse_constraint() 3298 or self._parse_column_def(self._parse_field(any_token=True)) 3299 ) 3300 3301 self._match_r_paren() 3302 return self.expression(exp.Schema, this=this, expressions=args) 3303 3304 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3305 # column defs are not really columns, they're identifiers 3306 if isinstance(this, exp.Column): 3307 this = this.this 3308 3309 kind = self._parse_types(schema=True) 3310 3311 if self._match_text_seq("FOR", "ORDINALITY"): 3312 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3313 3314 constraints = [] 3315 while True: 3316 constraint = self._parse_column_constraint() 3317 if not constraint: 3318 break 3319 constraints.append(constraint) 3320 3321 if not kind and not constraints: 3322 return this 3323 3324 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3325 3326 def _parse_auto_increment( 3327 self, 3328 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3329 start = None 3330 increment = None 3331 3332 if self._match(TokenType.L_PAREN, advance=False): 3333 args = self._parse_wrapped_csv(self._parse_bitwise) 3334 start = seq_get(args, 0) 3335 increment = seq_get(args, 1) 3336 elif self._match_text_seq("START"): 3337 start = self._parse_bitwise() 3338 self._match_text_seq("INCREMENT") 3339 increment = self._parse_bitwise() 3340 3341 if start and increment: 3342 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3343 3344 return exp.AutoIncrementColumnConstraint() 3345 3346 def _parse_compress(self) -> exp.CompressColumnConstraint: 3347 if self._match(TokenType.L_PAREN, advance=False): 3348 return self.expression( 3349 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3350 ) 3351 3352 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3353 3354 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3355 if self._match_text_seq("BY", "DEFAULT"): 3356 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3357 this = self.expression( 3358 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3359 ) 3360 else: 3361 self._match_text_seq("ALWAYS") 3362 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3363 3364 self._match(TokenType.ALIAS) 3365 identity = self._match_text_seq("IDENTITY") 3366 3367 if self._match(TokenType.L_PAREN): 3368 if self._match_text_seq("START", "WITH"): 3369 this.set("start", self._parse_bitwise()) 3370 if self._match_text_seq("INCREMENT", "BY"): 3371 this.set("increment", self._parse_bitwise()) 3372 if self._match_text_seq("MINVALUE"): 3373 this.set("minvalue", self._parse_bitwise()) 3374 if self._match_text_seq("MAXVALUE"): 3375 this.set("maxvalue", self._parse_bitwise()) 3376 3377 if self._match_text_seq("CYCLE"): 3378 this.set("cycle", True) 3379 elif self._match_text_seq("NO", "CYCLE"): 3380 this.set("cycle", False) 3381 3382 if not identity: 3383 this.set("expression", self._parse_bitwise()) 3384 3385 self._match_r_paren() 3386 3387 return this 3388 3389 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3390 self._match_text_seq("LENGTH") 3391 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3392 3393 def _parse_not_constraint( 3394 self, 3395 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3396 if self._match_text_seq("NULL"): 3397 return self.expression(exp.NotNullColumnConstraint) 3398 if self._match_text_seq("CASESPECIFIC"): 3399 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3400 return None 3401 3402 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3403 if self._match(TokenType.CONSTRAINT): 3404 this = self._parse_id_var() 3405 else: 3406 this = None 3407 3408 if self._match_texts(self.CONSTRAINT_PARSERS): 3409 return self.expression( 3410 exp.ColumnConstraint, 3411 this=this, 3412 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3413 ) 3414 3415 return this 3416 3417 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3418 if not self._match(TokenType.CONSTRAINT): 3419 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3420 3421 this = self._parse_id_var() 3422 expressions = [] 3423 3424 while True: 3425 constraint = self._parse_unnamed_constraint() or self._parse_function() 3426 if not constraint: 3427 break 3428 expressions.append(constraint) 3429 3430 return self.expression(exp.Constraint, this=this, expressions=expressions) 3431 3432 def _parse_unnamed_constraint( 3433 self, constraints: t.Optional[t.Collection[str]] = None 3434 ) -> t.Optional[exp.Expression]: 3435 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3436 return None 3437 3438 constraint = self._prev.text.upper() 3439 if constraint not in self.CONSTRAINT_PARSERS: 3440 self.raise_error(f"No parser found for schema constraint {constraint}.") 3441 3442 return self.CONSTRAINT_PARSERS[constraint](self) 3443 3444 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3445 self._match_text_seq("KEY") 3446 return self.expression( 3447 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3448 ) 3449 3450 def _parse_key_constraint_options(self) -> t.List[str]: 3451 options = [] 3452 while True: 3453 if not self._curr: 3454 break 3455 3456 if self._match(TokenType.ON): 3457 action = None 3458 on = self._advance_any() and self._prev.text 3459 3460 if self._match_text_seq("NO", "ACTION"): 3461 action = "NO ACTION" 3462 elif self._match_text_seq("CASCADE"): 3463 action = "CASCADE" 3464 elif self._match_pair(TokenType.SET, TokenType.NULL): 3465 action = "SET NULL" 3466 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3467 action = "SET DEFAULT" 3468 else: 3469 self.raise_error("Invalid key constraint") 3470 3471 options.append(f"ON {on} {action}") 3472 elif self._match_text_seq("NOT", "ENFORCED"): 3473 options.append("NOT ENFORCED") 3474 elif self._match_text_seq("DEFERRABLE"): 3475 options.append("DEFERRABLE") 3476 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3477 options.append("INITIALLY DEFERRED") 3478 elif self._match_text_seq("NORELY"): 3479 options.append("NORELY") 3480 elif self._match_text_seq("MATCH", "FULL"): 3481 options.append("MATCH FULL") 3482 else: 3483 break 3484 3485 return options 3486 3487 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3488 if match and not self._match(TokenType.REFERENCES): 3489 return None 3490 3491 expressions = None 3492 this = self._parse_id_var() 3493 3494 if self._match(TokenType.L_PAREN, advance=False): 3495 expressions = self._parse_wrapped_id_vars() 3496 3497 options = self._parse_key_constraint_options() 3498 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3499 3500 def _parse_foreign_key(self) -> exp.ForeignKey: 3501 expressions = self._parse_wrapped_id_vars() 3502 reference = self._parse_references() 3503 options = {} 3504 3505 while self._match(TokenType.ON): 3506 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3507 self.raise_error("Expected DELETE or UPDATE") 3508 3509 kind = self._prev.text.lower() 3510 3511 if self._match_text_seq("NO", "ACTION"): 3512 action = "NO ACTION" 3513 elif self._match(TokenType.SET): 3514 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3515 action = "SET " + self._prev.text.upper() 3516 else: 3517 self._advance() 3518 action = self._prev.text.upper() 3519 3520 options[kind] = action 3521 3522 return self.expression( 3523 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3524 ) 3525 3526 def _parse_primary_key( 3527 self, wrapped_optional: bool = False, in_props: bool = False 3528 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3529 desc = ( 3530 self._match_set((TokenType.ASC, TokenType.DESC)) 3531 and self._prev.token_type == TokenType.DESC 3532 ) 3533 3534 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3535 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3536 3537 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3538 options = self._parse_key_constraint_options() 3539 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3540 3541 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3542 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3543 return this 3544 3545 bracket_kind = self._prev.token_type 3546 3547 if self._match(TokenType.COLON): 3548 expressions: t.List[t.Optional[exp.Expression]] = [ 3549 self.expression(exp.Slice, expression=self._parse_conjunction()) 3550 ] 3551 else: 3552 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3553 3554 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3555 if bracket_kind == TokenType.L_BRACE: 3556 this = self.expression(exp.Struct, expressions=expressions) 3557 elif not this or this.name.upper() == "ARRAY": 3558 this = self.expression(exp.Array, expressions=expressions) 3559 else: 3560 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3561 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3562 3563 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3564 self.raise_error("Expected ]") 3565 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3566 self.raise_error("Expected }") 3567 3568 self._add_comments(this) 3569 return self._parse_bracket(this) 3570 3571 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3572 if self._match(TokenType.COLON): 3573 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3574 return this 3575 3576 def _parse_case(self) -> t.Optional[exp.Expression]: 3577 ifs = [] 3578 default = None 3579 3580 expression = self._parse_conjunction() 3581 3582 while self._match(TokenType.WHEN): 3583 this = self._parse_conjunction() 3584 self._match(TokenType.THEN) 3585 then = self._parse_conjunction() 3586 ifs.append(self.expression(exp.If, this=this, true=then)) 3587 3588 if self._match(TokenType.ELSE): 3589 default = self._parse_conjunction() 3590 3591 if not self._match(TokenType.END): 3592 self.raise_error("Expected END after CASE", self._prev) 3593 3594 return self._parse_window( 3595 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3596 ) 3597 3598 def _parse_if(self) -> t.Optional[exp.Expression]: 3599 if self._match(TokenType.L_PAREN): 3600 args = self._parse_csv(self._parse_conjunction) 3601 this = self.validate_expression(exp.If.from_arg_list(args), args) 3602 self._match_r_paren() 3603 else: 3604 index = self._index - 1 3605 condition = self._parse_conjunction() 3606 3607 if not condition: 3608 self._retreat(index) 3609 return None 3610 3611 self._match(TokenType.THEN) 3612 true = self._parse_conjunction() 3613 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3614 self._match(TokenType.END) 3615 this = self.expression(exp.If, this=condition, true=true, false=false) 3616 3617 return self._parse_window(this) 3618 3619 def _parse_extract(self) -> exp.Extract: 3620 this = self._parse_function() or self._parse_var() or self._parse_type() 3621 3622 if self._match(TokenType.FROM): 3623 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3624 3625 if not self._match(TokenType.COMMA): 3626 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3627 3628 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3629 3630 def _parse_cast(self, strict: bool) -> exp.Expression: 3631 this = self._parse_conjunction() 3632 3633 if not self._match(TokenType.ALIAS): 3634 if self._match(TokenType.COMMA): 3635 return self.expression( 3636 exp.CastToStrType, this=this, expression=self._parse_string() 3637 ) 3638 else: 3639 self.raise_error("Expected AS after CAST") 3640 3641 to = self._parse_types() 3642 3643 if not to: 3644 self.raise_error("Expected TYPE after CAST") 3645 elif to.this == exp.DataType.Type.CHAR: 3646 if self._match(TokenType.CHARACTER_SET): 3647 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3648 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3649 fmt = self._parse_string() 3650 3651 return self.expression( 3652 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3653 this=this, 3654 format=exp.Literal.string( 3655 format_time( 3656 fmt.this if fmt else "", 3657 self.FORMAT_MAPPING or self.TIME_MAPPING, 3658 self.FORMAT_TRIE or self.TIME_TRIE, 3659 ) 3660 ), 3661 ) 3662 3663 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3664 3665 def _parse_concat(self) -> t.Optional[exp.Expression]: 3666 args = self._parse_csv(self._parse_conjunction) 3667 if self.CONCAT_NULL_OUTPUTS_STRING: 3668 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3669 3670 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3671 # we find such a call we replace it with its argument. 3672 if len(args) == 1: 3673 return args[0] 3674 3675 return self.expression( 3676 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3677 ) 3678 3679 def _parse_string_agg(self) -> exp.Expression: 3680 expression: t.Optional[exp.Expression] 3681 3682 if self._match(TokenType.DISTINCT): 3683 args = self._parse_csv(self._parse_conjunction) 3684 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3685 else: 3686 args = self._parse_csv(self._parse_conjunction) 3687 expression = seq_get(args, 0) 3688 3689 index = self._index 3690 if not self._match(TokenType.R_PAREN): 3691 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3692 order = self._parse_order(this=expression) 3693 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3694 3695 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3696 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3697 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3698 if not self._match_text_seq("WITHIN", "GROUP"): 3699 self._retreat(index) 3700 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3701 3702 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3703 order = self._parse_order(this=expression) 3704 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3705 3706 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3707 to: t.Optional[exp.Expression] 3708 this = self._parse_bitwise() 3709 3710 if self._match(TokenType.USING): 3711 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3712 elif self._match(TokenType.COMMA): 3713 to = self._parse_bitwise() 3714 else: 3715 to = None 3716 3717 # Swap the argument order if needed to produce the correct AST 3718 if self.CONVERT_TYPE_FIRST: 3719 this, to = to, this 3720 3721 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3722 3723 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3724 """ 3725 There are generally two variants of the DECODE function: 3726 3727 - DECODE(bin, charset) 3728 - DECODE(expression, search, result [, search, result] ... [, default]) 3729 3730 The second variant will always be parsed into a CASE expression. Note that NULL 3731 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3732 instead of relying on pattern matching. 3733 """ 3734 args = self._parse_csv(self._parse_conjunction) 3735 3736 if len(args) < 3: 3737 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3738 3739 expression, *expressions = args 3740 if not expression: 3741 return None 3742 3743 ifs = [] 3744 for search, result in zip(expressions[::2], expressions[1::2]): 3745 if not search or not result: 3746 return None 3747 3748 if isinstance(search, exp.Literal): 3749 ifs.append( 3750 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3751 ) 3752 elif isinstance(search, exp.Null): 3753 ifs.append( 3754 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3755 ) 3756 else: 3757 cond = exp.or_( 3758 exp.EQ(this=expression.copy(), expression=search), 3759 exp.and_( 3760 exp.Is(this=expression.copy(), expression=exp.Null()), 3761 exp.Is(this=search.copy(), expression=exp.Null()), 3762 copy=False, 3763 ), 3764 copy=False, 3765 ) 3766 ifs.append(exp.If(this=cond, true=result)) 3767 3768 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3769 3770 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3771 self._match_text_seq("KEY") 3772 key = self._parse_field() 3773 self._match(TokenType.COLON) 3774 self._match_text_seq("VALUE") 3775 value = self._parse_field() 3776 3777 if not key and not value: 3778 return None 3779 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3780 3781 def _parse_json_object(self) -> exp.JSONObject: 3782 star = self._parse_star() 3783 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3784 3785 null_handling = None 3786 if self._match_text_seq("NULL", "ON", "NULL"): 3787 null_handling = "NULL ON NULL" 3788 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3789 null_handling = "ABSENT ON NULL" 3790 3791 unique_keys = None 3792 if self._match_text_seq("WITH", "UNIQUE"): 3793 unique_keys = True 3794 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3795 unique_keys = False 3796 3797 self._match_text_seq("KEYS") 3798 3799 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3800 format_json = self._match_text_seq("FORMAT", "JSON") 3801 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3802 3803 return self.expression( 3804 exp.JSONObject, 3805 expressions=expressions, 3806 null_handling=null_handling, 3807 unique_keys=unique_keys, 3808 return_type=return_type, 3809 format_json=format_json, 3810 encoding=encoding, 3811 ) 3812 3813 def _parse_logarithm(self) -> exp.Func: 3814 # Default argument order is base, expression 3815 args = self._parse_csv(self._parse_range) 3816 3817 if len(args) > 1: 3818 if not self.LOG_BASE_FIRST: 3819 args.reverse() 3820 return exp.Log.from_arg_list(args) 3821 3822 return self.expression( 3823 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3824 ) 3825 3826 def _parse_match_against(self) -> exp.MatchAgainst: 3827 expressions = self._parse_csv(self._parse_column) 3828 3829 self._match_text_seq(")", "AGAINST", "(") 3830 3831 this = self._parse_string() 3832 3833 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3834 modifier = "IN NATURAL LANGUAGE MODE" 3835 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3836 modifier = f"{modifier} WITH QUERY EXPANSION" 3837 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3838 modifier = "IN BOOLEAN MODE" 3839 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3840 modifier = "WITH QUERY EXPANSION" 3841 else: 3842 modifier = None 3843 3844 return self.expression( 3845 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3846 ) 3847 3848 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3849 def _parse_open_json(self) -> exp.OpenJSON: 3850 this = self._parse_bitwise() 3851 path = self._match(TokenType.COMMA) and self._parse_string() 3852 3853 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3854 this = self._parse_field(any_token=True) 3855 kind = self._parse_types() 3856 path = self._parse_string() 3857 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3858 3859 return self.expression( 3860 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3861 ) 3862 3863 expressions = None 3864 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3865 self._match_l_paren() 3866 expressions = self._parse_csv(_parse_open_json_column_def) 3867 3868 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3869 3870 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3871 args = self._parse_csv(self._parse_bitwise) 3872 3873 if self._match(TokenType.IN): 3874 return self.expression( 3875 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3876 ) 3877 3878 if haystack_first: 3879 haystack = seq_get(args, 0) 3880 needle = seq_get(args, 1) 3881 else: 3882 needle = seq_get(args, 0) 3883 haystack = seq_get(args, 1) 3884 3885 return self.expression( 3886 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3887 ) 3888 3889 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3890 args = self._parse_csv(self._parse_table) 3891 return exp.JoinHint(this=func_name.upper(), expressions=args) 3892 3893 def _parse_substring(self) -> exp.Substring: 3894 # Postgres supports the form: substring(string [from int] [for int]) 3895 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3896 3897 args = self._parse_csv(self._parse_bitwise) 3898 3899 if self._match(TokenType.FROM): 3900 args.append(self._parse_bitwise()) 3901 if self._match(TokenType.FOR): 3902 args.append(self._parse_bitwise()) 3903 3904 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3905 3906 def _parse_trim(self) -> exp.Trim: 3907 # https://www.w3resource.com/sql/character-functions/trim.php 3908 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3909 3910 position = None 3911 collation = None 3912 3913 if self._match_texts(self.TRIM_TYPES): 3914 position = self._prev.text.upper() 3915 3916 expression = self._parse_bitwise() 3917 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3918 this = self._parse_bitwise() 3919 else: 3920 this = expression 3921 expression = None 3922 3923 if self._match(TokenType.COLLATE): 3924 collation = self._parse_bitwise() 3925 3926 return self.expression( 3927 exp.Trim, this=this, position=position, expression=expression, collation=collation 3928 ) 3929 3930 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3931 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3932 3933 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3934 return self._parse_window(self._parse_id_var(), alias=True) 3935 3936 def _parse_respect_or_ignore_nulls( 3937 self, this: t.Optional[exp.Expression] 3938 ) -> t.Optional[exp.Expression]: 3939 if self._match_text_seq("IGNORE", "NULLS"): 3940 return self.expression(exp.IgnoreNulls, this=this) 3941 if self._match_text_seq("RESPECT", "NULLS"): 3942 return self.expression(exp.RespectNulls, this=this) 3943 return this 3944 3945 def _parse_window( 3946 self, this: t.Optional[exp.Expression], alias: bool = False 3947 ) -> t.Optional[exp.Expression]: 3948 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3949 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3950 self._match_r_paren() 3951 3952 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3953 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3954 if self._match_text_seq("WITHIN", "GROUP"): 3955 order = self._parse_wrapped(self._parse_order) 3956 this = self.expression(exp.WithinGroup, this=this, expression=order) 3957 3958 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3959 # Some dialects choose to implement and some do not. 3960 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3961 3962 # There is some code above in _parse_lambda that handles 3963 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3964 3965 # The below changes handle 3966 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3967 3968 # Oracle allows both formats 3969 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3970 # and Snowflake chose to do the same for familiarity 3971 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3972 this = self._parse_respect_or_ignore_nulls(this) 3973 3974 # bigquery select from window x AS (partition by ...) 3975 if alias: 3976 over = None 3977 self._match(TokenType.ALIAS) 3978 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3979 return this 3980 else: 3981 over = self._prev.text.upper() 3982 3983 if not self._match(TokenType.L_PAREN): 3984 return self.expression( 3985 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3986 ) 3987 3988 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3989 3990 first = self._match(TokenType.FIRST) 3991 if self._match_text_seq("LAST"): 3992 first = False 3993 3994 partition = self._parse_partition_by() 3995 order = self._parse_order() 3996 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3997 3998 if kind: 3999 self._match(TokenType.BETWEEN) 4000 start = self._parse_window_spec() 4001 self._match(TokenType.AND) 4002 end = self._parse_window_spec() 4003 4004 spec = self.expression( 4005 exp.WindowSpec, 4006 kind=kind, 4007 start=start["value"], 4008 start_side=start["side"], 4009 end=end["value"], 4010 end_side=end["side"], 4011 ) 4012 else: 4013 spec = None 4014 4015 self._match_r_paren() 4016 4017 return self.expression( 4018 exp.Window, 4019 this=this, 4020 partition_by=partition, 4021 order=order, 4022 spec=spec, 4023 alias=window_alias, 4024 over=over, 4025 first=first, 4026 ) 4027 4028 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4029 self._match(TokenType.BETWEEN) 4030 4031 return { 4032 "value": ( 4033 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4034 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4035 or self._parse_bitwise() 4036 ), 4037 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4038 } 4039 4040 def _parse_alias( 4041 self, this: t.Optional[exp.Expression], explicit: bool = False 4042 ) -> t.Optional[exp.Expression]: 4043 any_token = self._match(TokenType.ALIAS) 4044 4045 if explicit and not any_token: 4046 return this 4047 4048 if self._match(TokenType.L_PAREN): 4049 aliases = self.expression( 4050 exp.Aliases, 4051 this=this, 4052 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4053 ) 4054 self._match_r_paren(aliases) 4055 return aliases 4056 4057 alias = self._parse_id_var(any_token) 4058 4059 if alias: 4060 return self.expression(exp.Alias, this=this, alias=alias) 4061 4062 return this 4063 4064 def _parse_id_var( 4065 self, 4066 any_token: bool = True, 4067 tokens: t.Optional[t.Collection[TokenType]] = None, 4068 ) -> t.Optional[exp.Expression]: 4069 identifier = self._parse_identifier() 4070 4071 if identifier: 4072 return identifier 4073 4074 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4075 quoted = self._prev.token_type == TokenType.STRING 4076 return exp.Identifier(this=self._prev.text, quoted=quoted) 4077 4078 return None 4079 4080 def _parse_string(self) -> t.Optional[exp.Expression]: 4081 if self._match(TokenType.STRING): 4082 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4083 return self._parse_placeholder() 4084 4085 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4086 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4087 4088 def _parse_number(self) -> t.Optional[exp.Expression]: 4089 if self._match(TokenType.NUMBER): 4090 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4091 return self._parse_placeholder() 4092 4093 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4094 if self._match(TokenType.IDENTIFIER): 4095 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4096 return self._parse_placeholder() 4097 4098 def _parse_var( 4099 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4100 ) -> t.Optional[exp.Expression]: 4101 if ( 4102 (any_token and self._advance_any()) 4103 or self._match(TokenType.VAR) 4104 or (self._match_set(tokens) if tokens else False) 4105 ): 4106 return self.expression(exp.Var, this=self._prev.text) 4107 return self._parse_placeholder() 4108 4109 def _advance_any(self) -> t.Optional[Token]: 4110 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4111 self._advance() 4112 return self._prev 4113 return None 4114 4115 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4116 return self._parse_var() or self._parse_string() 4117 4118 def _parse_null(self) -> t.Optional[exp.Expression]: 4119 if self._match(TokenType.NULL): 4120 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4121 return None 4122 4123 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4124 if self._match(TokenType.TRUE): 4125 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4126 if self._match(TokenType.FALSE): 4127 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4128 return None 4129 4130 def _parse_star(self) -> t.Optional[exp.Expression]: 4131 if self._match(TokenType.STAR): 4132 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4133 return None 4134 4135 def _parse_parameter(self) -> exp.Parameter: 4136 wrapped = self._match(TokenType.L_BRACE) 4137 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4138 self._match(TokenType.R_BRACE) 4139 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4140 4141 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4142 if self._match_set(self.PLACEHOLDER_PARSERS): 4143 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4144 if placeholder: 4145 return placeholder 4146 self._advance(-1) 4147 return None 4148 4149 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4150 if not self._match(TokenType.EXCEPT): 4151 return None 4152 if self._match(TokenType.L_PAREN, advance=False): 4153 return self._parse_wrapped_csv(self._parse_column) 4154 return self._parse_csv(self._parse_column) 4155 4156 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4157 if not self._match(TokenType.REPLACE): 4158 return None 4159 if self._match(TokenType.L_PAREN, advance=False): 4160 return self._parse_wrapped_csv(self._parse_expression) 4161 return self._parse_csv(self._parse_expression) 4162 4163 def _parse_csv( 4164 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4165 ) -> t.List[t.Optional[exp.Expression]]: 4166 parse_result = parse_method() 4167 items = [parse_result] if parse_result is not None else [] 4168 4169 while self._match(sep): 4170 self._add_comments(parse_result) 4171 parse_result = parse_method() 4172 if parse_result is not None: 4173 items.append(parse_result) 4174 4175 return items 4176 4177 def _parse_tokens( 4178 self, parse_method: t.Callable, expressions: t.Dict 4179 ) -> t.Optional[exp.Expression]: 4180 this = parse_method() 4181 4182 while self._match_set(expressions): 4183 this = self.expression( 4184 expressions[self._prev.token_type], 4185 this=this, 4186 comments=self._prev_comments, 4187 expression=parse_method(), 4188 ) 4189 4190 return this 4191 4192 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4193 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4194 4195 def _parse_wrapped_csv( 4196 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4197 ) -> t.List[t.Optional[exp.Expression]]: 4198 return self._parse_wrapped( 4199 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4200 ) 4201 4202 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4203 wrapped = self._match(TokenType.L_PAREN) 4204 if not wrapped and not optional: 4205 self.raise_error("Expecting (") 4206 parse_result = parse_method() 4207 if wrapped: 4208 self._match_r_paren() 4209 return parse_result 4210 4211 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4212 return self._parse_select() or self._parse_set_operations( 4213 self._parse_expression() if alias else self._parse_conjunction() 4214 ) 4215 4216 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4217 return self._parse_query_modifiers( 4218 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4219 ) 4220 4221 def _parse_transaction(self) -> exp.Transaction: 4222 this = None 4223 if self._match_texts(self.TRANSACTION_KIND): 4224 this = self._prev.text 4225 4226 self._match_texts({"TRANSACTION", "WORK"}) 4227 4228 modes = [] 4229 while True: 4230 mode = [] 4231 while self._match(TokenType.VAR): 4232 mode.append(self._prev.text) 4233 4234 if mode: 4235 modes.append(" ".join(mode)) 4236 if not self._match(TokenType.COMMA): 4237 break 4238 4239 return self.expression(exp.Transaction, this=this, modes=modes) 4240 4241 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4242 chain = None 4243 savepoint = None 4244 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4245 4246 self._match_texts({"TRANSACTION", "WORK"}) 4247 4248 if self._match_text_seq("TO"): 4249 self._match_text_seq("SAVEPOINT") 4250 savepoint = self._parse_id_var() 4251 4252 if self._match(TokenType.AND): 4253 chain = not self._match_text_seq("NO") 4254 self._match_text_seq("CHAIN") 4255 4256 if is_rollback: 4257 return self.expression(exp.Rollback, savepoint=savepoint) 4258 4259 return self.expression(exp.Commit, chain=chain) 4260 4261 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4262 if not self._match_text_seq("ADD"): 4263 return None 4264 4265 self._match(TokenType.COLUMN) 4266 exists_column = self._parse_exists(not_=True) 4267 expression = self._parse_column_def(self._parse_field(any_token=True)) 4268 4269 if expression: 4270 expression.set("exists", exists_column) 4271 4272 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4273 if self._match_texts(("FIRST", "AFTER")): 4274 position = self._prev.text 4275 column_position = self.expression( 4276 exp.ColumnPosition, this=self._parse_column(), position=position 4277 ) 4278 expression.set("position", column_position) 4279 4280 return expression 4281 4282 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4283 drop = self._match(TokenType.DROP) and self._parse_drop() 4284 if drop and not isinstance(drop, exp.Command): 4285 drop.set("kind", drop.args.get("kind", "COLUMN")) 4286 return drop 4287 4288 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4289 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4290 return self.expression( 4291 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4292 ) 4293 4294 def _parse_add_constraint(self) -> exp.AddConstraint: 4295 this = None 4296 kind = self._prev.token_type 4297 4298 if kind == TokenType.CONSTRAINT: 4299 this = self._parse_id_var() 4300 4301 if self._match_text_seq("CHECK"): 4302 expression = self._parse_wrapped(self._parse_conjunction) 4303 enforced = self._match_text_seq("ENFORCED") 4304 4305 return self.expression( 4306 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4307 ) 4308 4309 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4310 expression = self._parse_foreign_key() 4311 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4312 expression = self._parse_primary_key() 4313 else: 4314 expression = None 4315 4316 return self.expression(exp.AddConstraint, this=this, expression=expression) 4317 4318 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4319 index = self._index - 1 4320 4321 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4322 return self._parse_csv(self._parse_add_constraint) 4323 4324 self._retreat(index) 4325 return self._parse_csv(self._parse_add_column) 4326 4327 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4328 self._match(TokenType.COLUMN) 4329 column = self._parse_field(any_token=True) 4330 4331 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4332 return self.expression(exp.AlterColumn, this=column, drop=True) 4333 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4334 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4335 4336 self._match_text_seq("SET", "DATA") 4337 return self.expression( 4338 exp.AlterColumn, 4339 this=column, 4340 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4341 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4342 using=self._match(TokenType.USING) and self._parse_conjunction(), 4343 ) 4344 4345 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4346 index = self._index - 1 4347 4348 partition_exists = self._parse_exists() 4349 if self._match(TokenType.PARTITION, advance=False): 4350 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4351 4352 self._retreat(index) 4353 return self._parse_csv(self._parse_drop_column) 4354 4355 def _parse_alter_table_rename(self) -> exp.RenameTable: 4356 self._match_text_seq("TO") 4357 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4358 4359 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4360 start = self._prev 4361 4362 if not self._match(TokenType.TABLE): 4363 return self._parse_as_command(start) 4364 4365 exists = self._parse_exists() 4366 this = self._parse_table(schema=True) 4367 4368 if self._next: 4369 self._advance() 4370 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4371 4372 if parser: 4373 actions = ensure_list(parser(self)) 4374 4375 if not self._curr: 4376 return self.expression( 4377 exp.AlterTable, 4378 this=this, 4379 exists=exists, 4380 actions=actions, 4381 ) 4382 return self._parse_as_command(start) 4383 4384 def _parse_merge(self) -> exp.Merge: 4385 self._match(TokenType.INTO) 4386 target = self._parse_table() 4387 4388 self._match(TokenType.USING) 4389 using = self._parse_table() 4390 4391 self._match(TokenType.ON) 4392 on = self._parse_conjunction() 4393 4394 whens = [] 4395 while self._match(TokenType.WHEN): 4396 matched = not self._match(TokenType.NOT) 4397 self._match_text_seq("MATCHED") 4398 source = ( 4399 False 4400 if self._match_text_seq("BY", "TARGET") 4401 else self._match_text_seq("BY", "SOURCE") 4402 ) 4403 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4404 4405 self._match(TokenType.THEN) 4406 4407 if self._match(TokenType.INSERT): 4408 _this = self._parse_star() 4409 if _this: 4410 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4411 else: 4412 then = self.expression( 4413 exp.Insert, 4414 this=self._parse_value(), 4415 expression=self._match(TokenType.VALUES) and self._parse_value(), 4416 ) 4417 elif self._match(TokenType.UPDATE): 4418 expressions = self._parse_star() 4419 if expressions: 4420 then = self.expression(exp.Update, expressions=expressions) 4421 else: 4422 then = self.expression( 4423 exp.Update, 4424 expressions=self._match(TokenType.SET) 4425 and self._parse_csv(self._parse_equality), 4426 ) 4427 elif self._match(TokenType.DELETE): 4428 then = self.expression(exp.Var, this=self._prev.text) 4429 else: 4430 then = None 4431 4432 whens.append( 4433 self.expression( 4434 exp.When, 4435 matched=matched, 4436 source=source, 4437 condition=condition, 4438 then=then, 4439 ) 4440 ) 4441 4442 return self.expression( 4443 exp.Merge, 4444 this=target, 4445 using=using, 4446 on=on, 4447 expressions=whens, 4448 ) 4449 4450 def _parse_show(self) -> t.Optional[exp.Expression]: 4451 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4452 if parser: 4453 return parser(self) 4454 self._advance() 4455 return self.expression(exp.Show, this=self._prev.text.upper()) 4456 4457 def _parse_set_item_assignment( 4458 self, kind: t.Optional[str] = None 4459 ) -> t.Optional[exp.Expression]: 4460 index = self._index 4461 4462 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4463 return self._parse_set_transaction(global_=kind == "GLOBAL") 4464 4465 left = self._parse_primary() or self._parse_id_var() 4466 4467 if not self._match_texts(("=", "TO")): 4468 self._retreat(index) 4469 return None 4470 4471 right = self._parse_statement() or self._parse_id_var() 4472 this = self.expression(exp.EQ, this=left, expression=right) 4473 4474 return self.expression(exp.SetItem, this=this, kind=kind) 4475 4476 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4477 self._match_text_seq("TRANSACTION") 4478 characteristics = self._parse_csv( 4479 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4480 ) 4481 return self.expression( 4482 exp.SetItem, 4483 expressions=characteristics, 4484 kind="TRANSACTION", 4485 **{"global": global_}, # type: ignore 4486 ) 4487 4488 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4489 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4490 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4491 4492 def _parse_set(self) -> exp.Set | exp.Command: 4493 index = self._index 4494 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4495 4496 if self._curr: 4497 self._retreat(index) 4498 return self._parse_as_command(self._prev) 4499 4500 return set_ 4501 4502 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4503 for option in options: 4504 if self._match_text_seq(*option.split(" ")): 4505 return exp.var(option) 4506 return None 4507 4508 def _parse_as_command(self, start: Token) -> exp.Command: 4509 while self._curr: 4510 self._advance() 4511 text = self._find_sql(start, self._prev) 4512 size = len(start.text) 4513 return exp.Command(this=text[:size], expression=text[size:]) 4514 4515 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4516 settings = [] 4517 4518 self._match_l_paren() 4519 kind = self._parse_id_var() 4520 4521 if self._match(TokenType.L_PAREN): 4522 while True: 4523 key = self._parse_id_var() 4524 value = self._parse_primary() 4525 4526 if not key and value is None: 4527 break 4528 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4529 self._match(TokenType.R_PAREN) 4530 4531 self._match_r_paren() 4532 4533 return self.expression( 4534 exp.DictProperty, 4535 this=this, 4536 kind=kind.this if kind else None, 4537 settings=settings, 4538 ) 4539 4540 def _parse_dict_range(self, this: str) -> exp.DictRange: 4541 self._match_l_paren() 4542 has_min = self._match_text_seq("MIN") 4543 if has_min: 4544 min = self._parse_var() or self._parse_primary() 4545 self._match_text_seq("MAX") 4546 max = self._parse_var() or self._parse_primary() 4547 else: 4548 max = self._parse_var() or self._parse_primary() 4549 min = exp.Literal.number(0) 4550 self._match_r_paren() 4551 return self.expression(exp.DictRange, this=this, min=min, max=max) 4552 4553 def _find_parser( 4554 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4555 ) -> t.Optional[t.Callable]: 4556 if not self._curr: 4557 return None 4558 4559 index = self._index 4560 this = [] 4561 while True: 4562 # The current token might be multiple words 4563 curr = self._curr.text.upper() 4564 key = curr.split(" ") 4565 this.append(curr) 4566 self._advance() 4567 result, trie = in_trie(trie, key) 4568 if result == 0: 4569 break 4570 if result == 2: 4571 subparser = parsers[" ".join(this)] 4572 return subparser 4573 self._retreat(index) 4574 return None 4575 4576 def _match(self, token_type, advance=True, expression=None): 4577 if not self._curr: 4578 return None 4579 4580 if self._curr.token_type == token_type: 4581 if advance: 4582 self._advance() 4583 self._add_comments(expression) 4584 return True 4585 4586 return None 4587 4588 def _match_set(self, types, advance=True): 4589 if not self._curr: 4590 return None 4591 4592 if self._curr.token_type in types: 4593 if advance: 4594 self._advance() 4595 return True 4596 4597 return None 4598 4599 def _match_pair(self, token_type_a, token_type_b, advance=True): 4600 if not self._curr or not self._next: 4601 return None 4602 4603 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4604 if advance: 4605 self._advance(2) 4606 return True 4607 4608 return None 4609 4610 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4611 if not self._match(TokenType.L_PAREN, expression=expression): 4612 self.raise_error("Expecting (") 4613 4614 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4615 if not self._match(TokenType.R_PAREN, expression=expression): 4616 self.raise_error("Expecting )") 4617 4618 def _match_texts(self, texts, advance=True): 4619 if self._curr and self._curr.text.upper() in texts: 4620 if advance: 4621 self._advance() 4622 return True 4623 return False 4624 4625 def _match_text_seq(self, *texts, advance=True): 4626 index = self._index 4627 for text in texts: 4628 if self._curr and self._curr.text.upper() == text: 4629 self._advance() 4630 else: 4631 self._retreat(index) 4632 return False 4633 4634 if not advance: 4635 self._retreat(index) 4636 4637 return True 4638 4639 @t.overload 4640 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4641 ... 4642 4643 @t.overload 4644 def _replace_columns_with_dots( 4645 self, this: t.Optional[exp.Expression] 4646 ) -> t.Optional[exp.Expression]: 4647 ... 4648 4649 def _replace_columns_with_dots(self, this): 4650 if isinstance(this, exp.Dot): 4651 exp.replace_children(this, self._replace_columns_with_dots) 4652 elif isinstance(this, exp.Column): 4653 exp.replace_children(this, self._replace_columns_with_dots) 4654 table = this.args.get("table") 4655 this = ( 4656 self.expression(exp.Dot, this=table, expression=this.this) 4657 if table 4658 else self.expression(exp.Var, this=this.name) 4659 ) 4660 elif isinstance(this, exp.Identifier): 4661 this = self.expression(exp.Var, this=this.name) 4662 4663 return this 4664 4665 def _replace_lambda( 4666 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4667 ) -> t.Optional[exp.Expression]: 4668 if not node: 4669 return node 4670 4671 for column in node.find_all(exp.Column): 4672 if column.parts[0].name in lambda_variables: 4673 dot_or_id = column.to_dot() if column.table else column.this 4674 parent = column.parent 4675 4676 while isinstance(parent, exp.Dot): 4677 if not isinstance(parent.parent, exp.Dot): 4678 parent.replace(dot_or_id) 4679 break 4680 parent = parent.parent 4681 else: 4682 if column is node: 4683 node = dot_or_id 4684 else: 4685 column.replace(dot_or_id) 4686 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
829 def __init__( 830 self, 831 error_level: t.Optional[ErrorLevel] = None, 832 error_message_context: int = 100, 833 max_errors: int = 3, 834 ): 835 self.error_level = error_level or ErrorLevel.IMMEDIATE 836 self.error_message_context = error_message_context 837 self.max_errors = max_errors 838 self.reset()
850 def parse( 851 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 852 ) -> t.List[t.Optional[exp.Expression]]: 853 """ 854 Parses a list of tokens and returns a list of syntax trees, one tree 855 per parsed SQL statement. 856 857 Args: 858 raw_tokens: The list of tokens. 859 sql: The original SQL string, used to produce helpful debug messages. 860 861 Returns: 862 The list of the produced syntax trees. 863 """ 864 return self._parse( 865 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 866 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
868 def parse_into( 869 self, 870 expression_types: exp.IntoType, 871 raw_tokens: t.List[Token], 872 sql: t.Optional[str] = None, 873 ) -> t.List[t.Optional[exp.Expression]]: 874 """ 875 Parses a list of tokens into a given Expression type. If a collection of Expression 876 types is given instead, this method will try to parse the token list into each one 877 of them, stopping at the first for which the parsing succeeds. 878 879 Args: 880 expression_types: The expression type(s) to try and parse the token list into. 881 raw_tokens: The list of tokens. 882 sql: The original SQL string, used to produce helpful debug messages. 883 884 Returns: 885 The target Expression. 886 """ 887 errors = [] 888 for expression_type in ensure_list(expression_types): 889 parser = self.EXPRESSION_PARSERS.get(expression_type) 890 if not parser: 891 raise TypeError(f"No parser registered for {expression_type}") 892 893 try: 894 return self._parse(parser, raw_tokens, sql) 895 except ParseError as e: 896 e.errors[0]["into_expression"] = expression_type 897 errors.append(e) 898 899 raise ParseError( 900 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 901 errors=merge_errors(errors), 902 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
939 def check_errors(self) -> None: 940 """Logs or raises any found errors, depending on the chosen error level setting.""" 941 if self.error_level == ErrorLevel.WARN: 942 for error in self.errors: 943 logger.error(str(error)) 944 elif self.error_level == ErrorLevel.RAISE and self.errors: 945 raise ParseError( 946 concat_messages(self.errors, self.max_errors), 947 errors=merge_errors(self.errors), 948 )
Logs or raises any found errors, depending on the chosen error level setting.
950 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 951 """ 952 Appends an error in the list of recorded errors or raises it, depending on the chosen 953 error level setting. 954 """ 955 token = token or self._curr or self._prev or Token.string("") 956 start = token.start 957 end = token.end + 1 958 start_context = self.sql[max(start - self.error_message_context, 0) : start] 959 highlight = self.sql[start:end] 960 end_context = self.sql[end : end + self.error_message_context] 961 962 error = ParseError.new( 963 f"{message}. Line {token.line}, Col: {token.col}.\n" 964 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 965 description=message, 966 line=token.line, 967 col=token.col, 968 start_context=start_context, 969 highlight=highlight, 970 end_context=end_context, 971 ) 972 973 if self.error_level == ErrorLevel.IMMEDIATE: 974 raise error 975 976 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
978 def expression( 979 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 980 ) -> E: 981 """ 982 Creates a new, validated Expression. 983 984 Args: 985 exp_class: The expression class to instantiate. 986 comments: An optional list of comments to attach to the expression. 987 kwargs: The arguments to set for the expression along with their respective values. 988 989 Returns: 990 The target expression. 991 """ 992 instance = exp_class(**kwargs) 993 instance.add_comments(comments) if comments else self._add_comments(instance) 994 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1001 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1002 """ 1003 Validates an Expression, making sure that all its mandatory arguments are set. 1004 1005 Args: 1006 expression: The expression to validate. 1007 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1008 1009 Returns: 1010 The validated expression. 1011 """ 1012 if self.error_level != ErrorLevel.IGNORE: 1013 for error_message in expression.error_messages(args): 1014 self.raise_error(error_message) 1015 1016 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.