sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.FIXEDSTRING, 140 TokenType.FLOAT, 141 TokenType.DOUBLE, 142 TokenType.CHAR, 143 TokenType.NCHAR, 144 TokenType.VARCHAR, 145 TokenType.NVARCHAR, 146 TokenType.TEXT, 147 TokenType.MEDIUMTEXT, 148 TokenType.LONGTEXT, 149 TokenType.MEDIUMBLOB, 150 TokenType.LONGBLOB, 151 TokenType.BINARY, 152 TokenType.VARBINARY, 153 TokenType.JSON, 154 TokenType.JSONB, 155 TokenType.INTERVAL, 156 TokenType.TIME, 157 TokenType.TIMETZ, 158 TokenType.TIMESTAMP, 159 TokenType.TIMESTAMPTZ, 160 TokenType.TIMESTAMPLTZ, 161 TokenType.DATETIME, 162 TokenType.DATETIME64, 163 TokenType.DATE, 164 TokenType.INT4RANGE, 165 TokenType.INT4MULTIRANGE, 166 TokenType.INT8RANGE, 167 TokenType.INT8MULTIRANGE, 168 TokenType.NUMRANGE, 169 TokenType.NUMMULTIRANGE, 170 TokenType.TSRANGE, 171 TokenType.TSMULTIRANGE, 172 TokenType.TSTZRANGE, 173 TokenType.TSTZMULTIRANGE, 174 TokenType.DATERANGE, 175 TokenType.DATEMULTIRANGE, 176 TokenType.DECIMAL, 177 TokenType.BIGDECIMAL, 178 TokenType.UUID, 179 TokenType.GEOGRAPHY, 180 TokenType.GEOMETRY, 181 TokenType.HLLSKETCH, 182 TokenType.HSTORE, 183 TokenType.PSEUDO_TYPE, 184 TokenType.SUPER, 185 TokenType.SERIAL, 186 TokenType.SMALLSERIAL, 187 TokenType.BIGSERIAL, 188 TokenType.XML, 189 TokenType.UNIQUEIDENTIFIER, 190 TokenType.USERDEFINED, 191 TokenType.MONEY, 192 TokenType.SMALLMONEY, 193 TokenType.ROWVERSION, 194 TokenType.IMAGE, 195 TokenType.VARIANT, 196 TokenType.OBJECT, 197 TokenType.INET, 198 TokenType.IPADDRESS, 199 TokenType.IPPREFIX, 200 *ENUM_TYPE_TOKENS, 201 *NESTED_TYPE_TOKENS, 202 } 203 204 SUBQUERY_PREDICATES = { 205 TokenType.ANY: exp.Any, 206 TokenType.ALL: exp.All, 207 TokenType.EXISTS: exp.Exists, 208 TokenType.SOME: exp.Any, 209 } 210 211 RESERVED_KEYWORDS = { 212 *Tokenizer.SINGLE_TOKENS.values(), 213 TokenType.SELECT, 214 } 215 216 DB_CREATABLES = { 217 TokenType.DATABASE, 218 TokenType.SCHEMA, 219 TokenType.TABLE, 220 TokenType.VIEW, 221 TokenType.DICTIONARY, 222 } 223 224 CREATABLES = { 225 TokenType.COLUMN, 226 TokenType.FUNCTION, 227 TokenType.INDEX, 228 TokenType.PROCEDURE, 229 *DB_CREATABLES, 230 } 231 232 # Tokens that can represent identifiers 233 ID_VAR_TOKENS = { 234 TokenType.VAR, 235 TokenType.ANTI, 236 TokenType.APPLY, 237 TokenType.ASC, 238 TokenType.AUTO_INCREMENT, 239 TokenType.BEGIN, 240 TokenType.CACHE, 241 TokenType.CASE, 242 TokenType.COLLATE, 243 TokenType.COMMAND, 244 TokenType.COMMENT, 245 TokenType.COMMIT, 246 TokenType.CONSTRAINT, 247 TokenType.DEFAULT, 248 TokenType.DELETE, 249 TokenType.DESC, 250 TokenType.DESCRIBE, 251 TokenType.DICTIONARY, 252 TokenType.DIV, 253 TokenType.END, 254 TokenType.EXECUTE, 255 TokenType.ESCAPE, 256 TokenType.FALSE, 257 TokenType.FIRST, 258 TokenType.FILTER, 259 TokenType.FORMAT, 260 TokenType.FULL, 261 TokenType.IS, 262 TokenType.ISNULL, 263 TokenType.INTERVAL, 264 TokenType.KEEP, 265 TokenType.LEFT, 266 TokenType.LOAD, 267 TokenType.MERGE, 268 TokenType.NATURAL, 269 TokenType.NEXT, 270 TokenType.OFFSET, 271 TokenType.ORDINALITY, 272 TokenType.OVERWRITE, 273 TokenType.PARTITION, 274 TokenType.PERCENT, 275 TokenType.PIVOT, 276 TokenType.PRAGMA, 277 TokenType.RANGE, 278 TokenType.REFERENCES, 279 TokenType.RIGHT, 280 TokenType.ROW, 281 TokenType.ROWS, 282 TokenType.SEMI, 283 TokenType.SET, 284 TokenType.SETTINGS, 285 TokenType.SHOW, 286 TokenType.TEMPORARY, 287 TokenType.TOP, 288 TokenType.TRUE, 289 TokenType.UNIQUE, 290 TokenType.UNPIVOT, 291 TokenType.UPDATE, 292 TokenType.VOLATILE, 293 TokenType.WINDOW, 294 *CREATABLES, 295 *SUBQUERY_PREDICATES, 296 *TYPE_TOKENS, 297 *NO_PAREN_FUNCTIONS, 298 } 299 300 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 301 302 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 303 TokenType.APPLY, 304 TokenType.ASOF, 305 TokenType.FULL, 306 TokenType.LEFT, 307 TokenType.LOCK, 308 TokenType.NATURAL, 309 TokenType.OFFSET, 310 TokenType.RIGHT, 311 TokenType.WINDOW, 312 } 313 314 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 315 316 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 317 318 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 319 320 FUNC_TOKENS = { 321 TokenType.COMMAND, 322 TokenType.CURRENT_DATE, 323 TokenType.CURRENT_DATETIME, 324 TokenType.CURRENT_TIMESTAMP, 325 TokenType.CURRENT_TIME, 326 TokenType.CURRENT_USER, 327 TokenType.FILTER, 328 TokenType.FIRST, 329 TokenType.FORMAT, 330 TokenType.GLOB, 331 TokenType.IDENTIFIER, 332 TokenType.INDEX, 333 TokenType.ISNULL, 334 TokenType.ILIKE, 335 TokenType.LIKE, 336 TokenType.MERGE, 337 TokenType.OFFSET, 338 TokenType.PRIMARY_KEY, 339 TokenType.RANGE, 340 TokenType.REPLACE, 341 TokenType.RLIKE, 342 TokenType.ROW, 343 TokenType.UNNEST, 344 TokenType.VAR, 345 TokenType.LEFT, 346 TokenType.RIGHT, 347 TokenType.DATE, 348 TokenType.DATETIME, 349 TokenType.TABLE, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMPTZ, 352 TokenType.WINDOW, 353 TokenType.XOR, 354 *TYPE_TOKENS, 355 *SUBQUERY_PREDICATES, 356 } 357 358 CONJUNCTION = { 359 TokenType.AND: exp.And, 360 TokenType.OR: exp.Or, 361 } 362 363 EQUALITY = { 364 TokenType.EQ: exp.EQ, 365 TokenType.NEQ: exp.NEQ, 366 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 367 } 368 369 COMPARISON = { 370 TokenType.GT: exp.GT, 371 TokenType.GTE: exp.GTE, 372 TokenType.LT: exp.LT, 373 TokenType.LTE: exp.LTE, 374 } 375 376 BITWISE = { 377 TokenType.AMP: exp.BitwiseAnd, 378 TokenType.CARET: exp.BitwiseXor, 379 TokenType.PIPE: exp.BitwiseOr, 380 TokenType.DPIPE: exp.DPipe, 381 } 382 383 TERM = { 384 TokenType.DASH: exp.Sub, 385 TokenType.PLUS: exp.Add, 386 TokenType.MOD: exp.Mod, 387 TokenType.COLLATE: exp.Collate, 388 } 389 390 FACTOR = { 391 TokenType.DIV: exp.IntDiv, 392 TokenType.LR_ARROW: exp.Distance, 393 TokenType.SLASH: exp.Div, 394 TokenType.STAR: exp.Mul, 395 } 396 397 TIMES = { 398 TokenType.TIME, 399 TokenType.TIMETZ, 400 } 401 402 TIMESTAMPS = { 403 TokenType.TIMESTAMP, 404 TokenType.TIMESTAMPTZ, 405 TokenType.TIMESTAMPLTZ, 406 *TIMES, 407 } 408 409 SET_OPERATIONS = { 410 TokenType.UNION, 411 TokenType.INTERSECT, 412 TokenType.EXCEPT, 413 } 414 415 JOIN_METHODS = { 416 TokenType.NATURAL, 417 TokenType.ASOF, 418 } 419 420 JOIN_SIDES = { 421 TokenType.LEFT, 422 TokenType.RIGHT, 423 TokenType.FULL, 424 } 425 426 JOIN_KINDS = { 427 TokenType.INNER, 428 TokenType.OUTER, 429 TokenType.CROSS, 430 TokenType.SEMI, 431 TokenType.ANTI, 432 } 433 434 JOIN_HINTS: t.Set[str] = set() 435 436 LAMBDAS = { 437 TokenType.ARROW: lambda self, expressions: self.expression( 438 exp.Lambda, 439 this=self._replace_lambda( 440 self._parse_conjunction(), 441 {node.name for node in expressions}, 442 ), 443 expressions=expressions, 444 ), 445 TokenType.FARROW: lambda self, expressions: self.expression( 446 exp.Kwarg, 447 this=exp.var(expressions[0].name), 448 expression=self._parse_conjunction(), 449 ), 450 } 451 452 COLUMN_OPERATORS = { 453 TokenType.DOT: None, 454 TokenType.DCOLON: lambda self, this, to: self.expression( 455 exp.Cast if self.STRICT_CAST else exp.TryCast, 456 this=this, 457 to=to, 458 ), 459 TokenType.ARROW: lambda self, this, path: self.expression( 460 exp.JSONExtract, 461 this=this, 462 expression=path, 463 ), 464 TokenType.DARROW: lambda self, this, path: self.expression( 465 exp.JSONExtractScalar, 466 this=this, 467 expression=path, 468 ), 469 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 470 exp.JSONBExtract, 471 this=this, 472 expression=path, 473 ), 474 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtractScalar, 476 this=this, 477 expression=path, 478 ), 479 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 480 exp.JSONBContains, 481 this=this, 482 expression=key, 483 ), 484 } 485 486 EXPRESSION_PARSERS = { 487 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 488 exp.Column: lambda self: self._parse_column(), 489 exp.Condition: lambda self: self._parse_conjunction(), 490 exp.DataType: lambda self: self._parse_types(), 491 exp.Expression: lambda self: self._parse_statement(), 492 exp.From: lambda self: self._parse_from(), 493 exp.Group: lambda self: self._parse_group(), 494 exp.Having: lambda self: self._parse_having(), 495 exp.Identifier: lambda self: self._parse_id_var(), 496 exp.Join: lambda self: self._parse_join(), 497 exp.Lambda: lambda self: self._parse_lambda(), 498 exp.Lateral: lambda self: self._parse_lateral(), 499 exp.Limit: lambda self: self._parse_limit(), 500 exp.Offset: lambda self: self._parse_offset(), 501 exp.Order: lambda self: self._parse_order(), 502 exp.Ordered: lambda self: self._parse_ordered(), 503 exp.Properties: lambda self: self._parse_properties(), 504 exp.Qualify: lambda self: self._parse_qualify(), 505 exp.Returning: lambda self: self._parse_returning(), 506 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 507 exp.Table: lambda self: self._parse_table_parts(), 508 exp.TableAlias: lambda self: self._parse_table_alias(), 509 exp.Where: lambda self: self._parse_where(), 510 exp.Window: lambda self: self._parse_named_window(), 511 exp.With: lambda self: self._parse_with(), 512 "JOIN_TYPE": lambda self: self._parse_join_parts(), 513 } 514 515 STATEMENT_PARSERS = { 516 TokenType.ALTER: lambda self: self._parse_alter(), 517 TokenType.BEGIN: lambda self: self._parse_transaction(), 518 TokenType.CACHE: lambda self: self._parse_cache(), 519 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 520 TokenType.COMMENT: lambda self: self._parse_comment(), 521 TokenType.CREATE: lambda self: self._parse_create(), 522 TokenType.DELETE: lambda self: self._parse_delete(), 523 TokenType.DESC: lambda self: self._parse_describe(), 524 TokenType.DESCRIBE: lambda self: self._parse_describe(), 525 TokenType.DROP: lambda self: self._parse_drop(), 526 TokenType.FROM: lambda self: exp.select("*").from_( 527 t.cast(exp.From, self._parse_from(skip_from_token=True)) 528 ), 529 TokenType.INSERT: lambda self: self._parse_insert(), 530 TokenType.LOAD: lambda self: self._parse_load(), 531 TokenType.MERGE: lambda self: self._parse_merge(), 532 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 533 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 534 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 535 TokenType.SET: lambda self: self._parse_set(), 536 TokenType.UNCACHE: lambda self: self._parse_uncache(), 537 TokenType.UPDATE: lambda self: self._parse_update(), 538 TokenType.USE: lambda self: self.expression( 539 exp.Use, 540 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 541 and exp.var(self._prev.text), 542 this=self._parse_table(schema=False), 543 ), 544 } 545 546 UNARY_PARSERS = { 547 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 548 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 549 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 550 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 551 } 552 553 PRIMARY_PARSERS = { 554 TokenType.STRING: lambda self, token: self.expression( 555 exp.Literal, this=token.text, is_string=True 556 ), 557 TokenType.NUMBER: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=False 559 ), 560 TokenType.STAR: lambda self, _: self.expression( 561 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 562 ), 563 TokenType.NULL: lambda self, _: self.expression(exp.Null), 564 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 565 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 566 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 567 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 568 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 569 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 570 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 571 exp.National, this=token.text 572 ), 573 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 574 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 575 } 576 577 PLACEHOLDER_PARSERS = { 578 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 579 TokenType.PARAMETER: lambda self: self._parse_parameter(), 580 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 581 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 582 else None, 583 } 584 585 RANGE_PARSERS = { 586 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 587 TokenType.GLOB: binary_range_parser(exp.Glob), 588 TokenType.ILIKE: binary_range_parser(exp.ILike), 589 TokenType.IN: lambda self, this: self._parse_in(this), 590 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 591 TokenType.IS: lambda self, this: self._parse_is(this), 592 TokenType.LIKE: binary_range_parser(exp.Like), 593 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 594 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 595 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 596 } 597 598 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 599 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 600 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 601 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 602 "CHARACTER SET": lambda self: self._parse_character_set(), 603 "CHECKSUM": lambda self: self._parse_checksum(), 604 "CLUSTER BY": lambda self: self._parse_cluster(), 605 "CLUSTERED": lambda self: self._parse_clustered_by(), 606 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 607 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 608 "COPY": lambda self: self._parse_copy_property(), 609 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 610 "DEFINER": lambda self: self._parse_definer(), 611 "DETERMINISTIC": lambda self: self.expression( 612 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 613 ), 614 "DISTKEY": lambda self: self._parse_distkey(), 615 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 616 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 617 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 618 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 619 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 620 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 621 "FREESPACE": lambda self: self._parse_freespace(), 622 "HEAP": lambda self: self.expression(exp.HeapProperty), 623 "IMMUTABLE": lambda self: self.expression( 624 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 625 ), 626 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 627 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 628 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 629 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 630 "LIKE": lambda self: self._parse_create_like(), 631 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 632 "LOCK": lambda self: self._parse_locking(), 633 "LOCKING": lambda self: self._parse_locking(), 634 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 635 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 636 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 637 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 638 "NO": lambda self: self._parse_no_property(), 639 "ON": lambda self: self._parse_on_property(), 640 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 641 "PARTITION BY": lambda self: self._parse_partitioned_by(), 642 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 643 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 644 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 645 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 646 "RETURNS": lambda self: self._parse_returns(), 647 "ROW": lambda self: self._parse_row(), 648 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 649 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 650 "SETTINGS": lambda self: self.expression( 651 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 652 ), 653 "SORTKEY": lambda self: self._parse_sortkey(), 654 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 655 "STABLE": lambda self: self.expression( 656 exp.StabilityProperty, this=exp.Literal.string("STABLE") 657 ), 658 "STORED": lambda self: self._parse_stored(), 659 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 660 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 661 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 662 "TO": lambda self: self._parse_to_table(), 663 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 664 "TTL": lambda self: self._parse_ttl(), 665 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 666 "VOLATILE": lambda self: self._parse_volatile_property(), 667 "WITH": lambda self: self._parse_with_property(), 668 } 669 670 CONSTRAINT_PARSERS = { 671 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 672 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 673 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 674 "CHARACTER SET": lambda self: self.expression( 675 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "CHECK": lambda self: self.expression( 678 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 679 ), 680 "COLLATE": lambda self: self.expression( 681 exp.CollateColumnConstraint, this=self._parse_var() 682 ), 683 "COMMENT": lambda self: self.expression( 684 exp.CommentColumnConstraint, this=self._parse_string() 685 ), 686 "COMPRESS": lambda self: self._parse_compress(), 687 "DEFAULT": lambda self: self.expression( 688 exp.DefaultColumnConstraint, this=self._parse_bitwise() 689 ), 690 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 691 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 692 "FORMAT": lambda self: self.expression( 693 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "GENERATED": lambda self: self._parse_generated_as_identity(), 696 "IDENTITY": lambda self: self._parse_auto_increment(), 697 "INLINE": lambda self: self._parse_inline(), 698 "LIKE": lambda self: self._parse_create_like(), 699 "NOT": lambda self: self._parse_not_constraint(), 700 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 701 "ON": lambda self: self._match(TokenType.UPDATE) 702 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 703 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 704 "PRIMARY KEY": lambda self: self._parse_primary_key(), 705 "REFERENCES": lambda self: self._parse_references(match=False), 706 "TITLE": lambda self: self.expression( 707 exp.TitleColumnConstraint, this=self._parse_var_or_string() 708 ), 709 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 710 "UNIQUE": lambda self: self._parse_unique(), 711 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 712 } 713 714 ALTER_PARSERS = { 715 "ADD": lambda self: self._parse_alter_table_add(), 716 "ALTER": lambda self: self._parse_alter_table_alter(), 717 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 718 "DROP": lambda self: self._parse_alter_table_drop(), 719 "RENAME": lambda self: self._parse_alter_table_rename(), 720 } 721 722 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 723 724 NO_PAREN_FUNCTION_PARSERS = { 725 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 726 "CASE": lambda self: self._parse_case(), 727 "IF": lambda self: self._parse_if(), 728 "NEXT": lambda self: self._parse_next_value_for(), 729 } 730 731 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 732 733 FUNCTION_PARSERS = { 734 "ANY_VALUE": lambda self: self._parse_any_value(), 735 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 736 "CONCAT": lambda self: self._parse_concat(), 737 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 738 "DECODE": lambda self: self._parse_decode(), 739 "EXTRACT": lambda self: self._parse_extract(), 740 "JSON_OBJECT": lambda self: self._parse_json_object(), 741 "LOG": lambda self: self._parse_logarithm(), 742 "MATCH": lambda self: self._parse_match_against(), 743 "OPENJSON": lambda self: self._parse_open_json(), 744 "POSITION": lambda self: self._parse_position(), 745 "SAFE_CAST": lambda self: self._parse_cast(False), 746 "STRING_AGG": lambda self: self._parse_string_agg(), 747 "SUBSTRING": lambda self: self._parse_substring(), 748 "TRIM": lambda self: self._parse_trim(), 749 "TRY_CAST": lambda self: self._parse_cast(False), 750 "TRY_CONVERT": lambda self: self._parse_convert(False), 751 } 752 753 QUERY_MODIFIER_PARSERS = { 754 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 755 TokenType.WHERE: lambda self: ("where", self._parse_where()), 756 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 757 TokenType.HAVING: lambda self: ("having", self._parse_having()), 758 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 759 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 760 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 761 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 762 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 763 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 764 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 765 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 766 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 767 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 768 TokenType.CLUSTER_BY: lambda self: ( 769 "cluster", 770 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 771 ), 772 TokenType.DISTRIBUTE_BY: lambda self: ( 773 "distribute", 774 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 775 ), 776 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 777 } 778 779 SET_PARSERS = { 780 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 781 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 782 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 783 "TRANSACTION": lambda self: self._parse_set_transaction(), 784 } 785 786 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 787 788 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 789 790 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 791 792 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 793 794 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 795 796 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 797 TRANSACTION_CHARACTERISTICS = { 798 "ISOLATION LEVEL REPEATABLE READ", 799 "ISOLATION LEVEL READ COMMITTED", 800 "ISOLATION LEVEL READ UNCOMMITTED", 801 "ISOLATION LEVEL SERIALIZABLE", 802 "READ WRITE", 803 "READ ONLY", 804 } 805 806 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 807 808 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 809 810 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 811 812 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 813 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 814 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 815 816 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 817 818 STRICT_CAST = True 819 820 # A NULL arg in CONCAT yields NULL by default 821 CONCAT_NULL_OUTPUTS_STRING = False 822 823 PREFIXED_PIVOT_COLUMNS = False 824 IDENTIFY_PIVOT_STRINGS = False 825 826 LOG_BASE_FIRST = True 827 LOG_DEFAULTS_TO_LN = False 828 829 __slots__ = ( 830 "error_level", 831 "error_message_context", 832 "max_errors", 833 "sql", 834 "errors", 835 "_tokens", 836 "_index", 837 "_curr", 838 "_next", 839 "_prev", 840 "_prev_comments", 841 ) 842 843 # Autofilled 844 INDEX_OFFSET: int = 0 845 UNNEST_COLUMN_ONLY: bool = False 846 ALIAS_POST_TABLESAMPLE: bool = False 847 STRICT_STRING_CONCAT = False 848 NORMALIZE_FUNCTIONS = "upper" 849 NULL_ORDERING: str = "nulls_are_small" 850 SHOW_TRIE: t.Dict = {} 851 SET_TRIE: t.Dict = {} 852 FORMAT_MAPPING: t.Dict[str, str] = {} 853 FORMAT_TRIE: t.Dict = {} 854 TIME_MAPPING: t.Dict[str, str] = {} 855 TIME_TRIE: t.Dict = {} 856 857 def __init__( 858 self, 859 error_level: t.Optional[ErrorLevel] = None, 860 error_message_context: int = 100, 861 max_errors: int = 3, 862 ): 863 self.error_level = error_level or ErrorLevel.IMMEDIATE 864 self.error_message_context = error_message_context 865 self.max_errors = max_errors 866 self.reset() 867 868 def reset(self): 869 self.sql = "" 870 self.errors = [] 871 self._tokens = [] 872 self._index = 0 873 self._curr = None 874 self._next = None 875 self._prev = None 876 self._prev_comments = None 877 878 def parse( 879 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 880 ) -> t.List[t.Optional[exp.Expression]]: 881 """ 882 Parses a list of tokens and returns a list of syntax trees, one tree 883 per parsed SQL statement. 884 885 Args: 886 raw_tokens: The list of tokens. 887 sql: The original SQL string, used to produce helpful debug messages. 888 889 Returns: 890 The list of the produced syntax trees. 891 """ 892 return self._parse( 893 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 894 ) 895 896 def parse_into( 897 self, 898 expression_types: exp.IntoType, 899 raw_tokens: t.List[Token], 900 sql: t.Optional[str] = None, 901 ) -> t.List[t.Optional[exp.Expression]]: 902 """ 903 Parses a list of tokens into a given Expression type. If a collection of Expression 904 types is given instead, this method will try to parse the token list into each one 905 of them, stopping at the first for which the parsing succeeds. 906 907 Args: 908 expression_types: The expression type(s) to try and parse the token list into. 909 raw_tokens: The list of tokens. 910 sql: The original SQL string, used to produce helpful debug messages. 911 912 Returns: 913 The target Expression. 914 """ 915 errors = [] 916 for expression_type in ensure_list(expression_types): 917 parser = self.EXPRESSION_PARSERS.get(expression_type) 918 if not parser: 919 raise TypeError(f"No parser registered for {expression_type}") 920 921 try: 922 return self._parse(parser, raw_tokens, sql) 923 except ParseError as e: 924 e.errors[0]["into_expression"] = expression_type 925 errors.append(e) 926 927 raise ParseError( 928 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 929 errors=merge_errors(errors), 930 ) from errors[-1] 931 932 def _parse( 933 self, 934 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 935 raw_tokens: t.List[Token], 936 sql: t.Optional[str] = None, 937 ) -> t.List[t.Optional[exp.Expression]]: 938 self.reset() 939 self.sql = sql or "" 940 941 total = len(raw_tokens) 942 chunks: t.List[t.List[Token]] = [[]] 943 944 for i, token in enumerate(raw_tokens): 945 if token.token_type == TokenType.SEMICOLON: 946 if i < total - 1: 947 chunks.append([]) 948 else: 949 chunks[-1].append(token) 950 951 expressions = [] 952 953 for tokens in chunks: 954 self._index = -1 955 self._tokens = tokens 956 self._advance() 957 958 expressions.append(parse_method(self)) 959 960 if self._index < len(self._tokens): 961 self.raise_error("Invalid expression / Unexpected token") 962 963 self.check_errors() 964 965 return expressions 966 967 def check_errors(self) -> None: 968 """Logs or raises any found errors, depending on the chosen error level setting.""" 969 if self.error_level == ErrorLevel.WARN: 970 for error in self.errors: 971 logger.error(str(error)) 972 elif self.error_level == ErrorLevel.RAISE and self.errors: 973 raise ParseError( 974 concat_messages(self.errors, self.max_errors), 975 errors=merge_errors(self.errors), 976 ) 977 978 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 979 """ 980 Appends an error in the list of recorded errors or raises it, depending on the chosen 981 error level setting. 982 """ 983 token = token or self._curr or self._prev or Token.string("") 984 start = token.start 985 end = token.end + 1 986 start_context = self.sql[max(start - self.error_message_context, 0) : start] 987 highlight = self.sql[start:end] 988 end_context = self.sql[end : end + self.error_message_context] 989 990 error = ParseError.new( 991 f"{message}. Line {token.line}, Col: {token.col}.\n" 992 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 993 description=message, 994 line=token.line, 995 col=token.col, 996 start_context=start_context, 997 highlight=highlight, 998 end_context=end_context, 999 ) 1000 1001 if self.error_level == ErrorLevel.IMMEDIATE: 1002 raise error 1003 1004 self.errors.append(error) 1005 1006 def expression( 1007 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1008 ) -> E: 1009 """ 1010 Creates a new, validated Expression. 1011 1012 Args: 1013 exp_class: The expression class to instantiate. 1014 comments: An optional list of comments to attach to the expression. 1015 kwargs: The arguments to set for the expression along with their respective values. 1016 1017 Returns: 1018 The target expression. 1019 """ 1020 instance = exp_class(**kwargs) 1021 instance.add_comments(comments) if comments else self._add_comments(instance) 1022 return self.validate_expression(instance) 1023 1024 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1025 if expression and self._prev_comments: 1026 expression.add_comments(self._prev_comments) 1027 self._prev_comments = None 1028 1029 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1030 """ 1031 Validates an Expression, making sure that all its mandatory arguments are set. 1032 1033 Args: 1034 expression: The expression to validate. 1035 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1036 1037 Returns: 1038 The validated expression. 1039 """ 1040 if self.error_level != ErrorLevel.IGNORE: 1041 for error_message in expression.error_messages(args): 1042 self.raise_error(error_message) 1043 1044 return expression 1045 1046 def _find_sql(self, start: Token, end: Token) -> str: 1047 return self.sql[start.start : end.end + 1] 1048 1049 def _advance(self, times: int = 1) -> None: 1050 self._index += times 1051 self._curr = seq_get(self._tokens, self._index) 1052 self._next = seq_get(self._tokens, self._index + 1) 1053 1054 if self._index > 0: 1055 self._prev = self._tokens[self._index - 1] 1056 self._prev_comments = self._prev.comments 1057 else: 1058 self._prev = None 1059 self._prev_comments = None 1060 1061 def _retreat(self, index: int) -> None: 1062 if index != self._index: 1063 self._advance(index - self._index) 1064 1065 def _parse_command(self) -> exp.Command: 1066 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1067 1068 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1069 start = self._prev 1070 exists = self._parse_exists() if allow_exists else None 1071 1072 self._match(TokenType.ON) 1073 1074 kind = self._match_set(self.CREATABLES) and self._prev 1075 if not kind: 1076 return self._parse_as_command(start) 1077 1078 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1079 this = self._parse_user_defined_function(kind=kind.token_type) 1080 elif kind.token_type == TokenType.TABLE: 1081 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1082 elif kind.token_type == TokenType.COLUMN: 1083 this = self._parse_column() 1084 else: 1085 this = self._parse_id_var() 1086 1087 self._match(TokenType.IS) 1088 1089 return self.expression( 1090 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1091 ) 1092 1093 def _parse_to_table( 1094 self, 1095 ) -> exp.ToTableProperty: 1096 table = self._parse_table_parts(schema=True) 1097 return self.expression(exp.ToTableProperty, this=table) 1098 1099 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1100 def _parse_ttl(self) -> exp.Expression: 1101 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1102 this = self._parse_bitwise() 1103 1104 if self._match_text_seq("DELETE"): 1105 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1106 if self._match_text_seq("RECOMPRESS"): 1107 return self.expression( 1108 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1109 ) 1110 if self._match_text_seq("TO", "DISK"): 1111 return self.expression( 1112 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1113 ) 1114 if self._match_text_seq("TO", "VOLUME"): 1115 return self.expression( 1116 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1117 ) 1118 1119 return this 1120 1121 expressions = self._parse_csv(_parse_ttl_action) 1122 where = self._parse_where() 1123 group = self._parse_group() 1124 1125 aggregates = None 1126 if group and self._match(TokenType.SET): 1127 aggregates = self._parse_csv(self._parse_set_item) 1128 1129 return self.expression( 1130 exp.MergeTreeTTL, 1131 expressions=expressions, 1132 where=where, 1133 group=group, 1134 aggregates=aggregates, 1135 ) 1136 1137 def _parse_statement(self) -> t.Optional[exp.Expression]: 1138 if self._curr is None: 1139 return None 1140 1141 if self._match_set(self.STATEMENT_PARSERS): 1142 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1143 1144 if self._match_set(Tokenizer.COMMANDS): 1145 return self._parse_command() 1146 1147 expression = self._parse_expression() 1148 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1149 return self._parse_query_modifiers(expression) 1150 1151 def _parse_drop(self) -> exp.Drop | exp.Command: 1152 start = self._prev 1153 temporary = self._match(TokenType.TEMPORARY) 1154 materialized = self._match_text_seq("MATERIALIZED") 1155 1156 kind = self._match_set(self.CREATABLES) and self._prev.text 1157 if not kind: 1158 return self._parse_as_command(start) 1159 1160 return self.expression( 1161 exp.Drop, 1162 comments=start.comments, 1163 exists=self._parse_exists(), 1164 this=self._parse_table(schema=True), 1165 kind=kind, 1166 temporary=temporary, 1167 materialized=materialized, 1168 cascade=self._match_text_seq("CASCADE"), 1169 constraints=self._match_text_seq("CONSTRAINTS"), 1170 purge=self._match_text_seq("PURGE"), 1171 ) 1172 1173 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1174 return ( 1175 self._match_text_seq("IF") 1176 and (not not_ or self._match(TokenType.NOT)) 1177 and self._match(TokenType.EXISTS) 1178 ) 1179 1180 def _parse_create(self) -> exp.Create | exp.Command: 1181 # Note: this can't be None because we've matched a statement parser 1182 start = self._prev 1183 comments = self._prev_comments 1184 1185 replace = start.text.upper() == "REPLACE" or self._match_pair( 1186 TokenType.OR, TokenType.REPLACE 1187 ) 1188 unique = self._match(TokenType.UNIQUE) 1189 1190 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1191 self._advance() 1192 1193 properties = None 1194 create_token = self._match_set(self.CREATABLES) and self._prev 1195 1196 if not create_token: 1197 # exp.Properties.Location.POST_CREATE 1198 properties = self._parse_properties() 1199 create_token = self._match_set(self.CREATABLES) and self._prev 1200 1201 if not properties or not create_token: 1202 return self._parse_as_command(start) 1203 1204 exists = self._parse_exists(not_=True) 1205 this = None 1206 expression: t.Optional[exp.Expression] = None 1207 indexes = None 1208 no_schema_binding = None 1209 begin = None 1210 clone = None 1211 1212 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1213 nonlocal properties 1214 if properties and temp_props: 1215 properties.expressions.extend(temp_props.expressions) 1216 elif temp_props: 1217 properties = temp_props 1218 1219 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1220 this = self._parse_user_defined_function(kind=create_token.token_type) 1221 1222 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1223 extend_props(self._parse_properties()) 1224 1225 self._match(TokenType.ALIAS) 1226 1227 if self._match(TokenType.COMMAND): 1228 expression = self._parse_as_command(self._prev) 1229 else: 1230 begin = self._match(TokenType.BEGIN) 1231 return_ = self._match_text_seq("RETURN") 1232 expression = self._parse_statement() 1233 1234 if return_: 1235 expression = self.expression(exp.Return, this=expression) 1236 elif create_token.token_type == TokenType.INDEX: 1237 this = self._parse_index(index=self._parse_id_var()) 1238 elif create_token.token_type in self.DB_CREATABLES: 1239 table_parts = self._parse_table_parts(schema=True) 1240 1241 # exp.Properties.Location.POST_NAME 1242 self._match(TokenType.COMMA) 1243 extend_props(self._parse_properties(before=True)) 1244 1245 this = self._parse_schema(this=table_parts) 1246 1247 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1248 extend_props(self._parse_properties()) 1249 1250 self._match(TokenType.ALIAS) 1251 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1252 # exp.Properties.Location.POST_ALIAS 1253 extend_props(self._parse_properties()) 1254 1255 expression = self._parse_ddl_select() 1256 1257 if create_token.token_type == TokenType.TABLE: 1258 # exp.Properties.Location.POST_EXPRESSION 1259 extend_props(self._parse_properties()) 1260 1261 indexes = [] 1262 while True: 1263 index = self._parse_index() 1264 1265 # exp.Properties.Location.POST_INDEX 1266 extend_props(self._parse_properties()) 1267 1268 if not index: 1269 break 1270 else: 1271 self._match(TokenType.COMMA) 1272 indexes.append(index) 1273 elif create_token.token_type == TokenType.VIEW: 1274 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1275 no_schema_binding = True 1276 1277 if self._match_text_seq("CLONE"): 1278 clone = self._parse_table(schema=True) 1279 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1280 clone_kind = ( 1281 self._match(TokenType.L_PAREN) 1282 and self._match_texts(self.CLONE_KINDS) 1283 and self._prev.text.upper() 1284 ) 1285 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1286 self._match(TokenType.R_PAREN) 1287 clone = self.expression( 1288 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1289 ) 1290 1291 return self.expression( 1292 exp.Create, 1293 comments=comments, 1294 this=this, 1295 kind=create_token.text, 1296 replace=replace, 1297 unique=unique, 1298 expression=expression, 1299 exists=exists, 1300 properties=properties, 1301 indexes=indexes, 1302 no_schema_binding=no_schema_binding, 1303 begin=begin, 1304 clone=clone, 1305 ) 1306 1307 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1308 # only used for teradata currently 1309 self._match(TokenType.COMMA) 1310 1311 kwargs = { 1312 "no": self._match_text_seq("NO"), 1313 "dual": self._match_text_seq("DUAL"), 1314 "before": self._match_text_seq("BEFORE"), 1315 "default": self._match_text_seq("DEFAULT"), 1316 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1317 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1318 "after": self._match_text_seq("AFTER"), 1319 "minimum": self._match_texts(("MIN", "MINIMUM")), 1320 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1321 } 1322 1323 if self._match_texts(self.PROPERTY_PARSERS): 1324 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1325 try: 1326 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1327 except TypeError: 1328 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1329 1330 return None 1331 1332 def _parse_property(self) -> t.Optional[exp.Expression]: 1333 if self._match_texts(self.PROPERTY_PARSERS): 1334 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1335 1336 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1337 return self._parse_character_set(default=True) 1338 1339 if self._match_text_seq("COMPOUND", "SORTKEY"): 1340 return self._parse_sortkey(compound=True) 1341 1342 if self._match_text_seq("SQL", "SECURITY"): 1343 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1344 1345 assignment = self._match_pair( 1346 TokenType.VAR, TokenType.EQ, advance=False 1347 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1348 1349 if assignment: 1350 key = self._parse_var_or_string() 1351 self._match(TokenType.EQ) 1352 return self.expression(exp.Property, this=key, value=self._parse_column()) 1353 1354 return None 1355 1356 def _parse_stored(self) -> exp.FileFormatProperty: 1357 self._match(TokenType.ALIAS) 1358 1359 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1360 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1361 1362 return self.expression( 1363 exp.FileFormatProperty, 1364 this=self.expression( 1365 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1366 ) 1367 if input_format or output_format 1368 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1369 ) 1370 1371 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1372 self._match(TokenType.EQ) 1373 self._match(TokenType.ALIAS) 1374 return self.expression(exp_class, this=self._parse_field()) 1375 1376 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1377 properties = [] 1378 while True: 1379 if before: 1380 prop = self._parse_property_before() 1381 else: 1382 prop = self._parse_property() 1383 1384 if not prop: 1385 break 1386 for p in ensure_list(prop): 1387 properties.append(p) 1388 1389 if properties: 1390 return self.expression(exp.Properties, expressions=properties) 1391 1392 return None 1393 1394 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1395 return self.expression( 1396 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1397 ) 1398 1399 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1400 if self._index >= 2: 1401 pre_volatile_token = self._tokens[self._index - 2] 1402 else: 1403 pre_volatile_token = None 1404 1405 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1406 return exp.VolatileProperty() 1407 1408 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1409 1410 def _parse_with_property( 1411 self, 1412 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1413 if self._match(TokenType.L_PAREN, advance=False): 1414 return self._parse_wrapped_csv(self._parse_property) 1415 1416 if self._match_text_seq("JOURNAL"): 1417 return self._parse_withjournaltable() 1418 1419 if self._match_text_seq("DATA"): 1420 return self._parse_withdata(no=False) 1421 elif self._match_text_seq("NO", "DATA"): 1422 return self._parse_withdata(no=True) 1423 1424 if not self._next: 1425 return None 1426 1427 return self._parse_withisolatedloading() 1428 1429 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1430 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1431 self._match(TokenType.EQ) 1432 1433 user = self._parse_id_var() 1434 self._match(TokenType.PARAMETER) 1435 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1436 1437 if not user or not host: 1438 return None 1439 1440 return exp.DefinerProperty(this=f"{user}@{host}") 1441 1442 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1443 self._match(TokenType.TABLE) 1444 self._match(TokenType.EQ) 1445 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1446 1447 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1448 return self.expression(exp.LogProperty, no=no) 1449 1450 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1451 return self.expression(exp.JournalProperty, **kwargs) 1452 1453 def _parse_checksum(self) -> exp.ChecksumProperty: 1454 self._match(TokenType.EQ) 1455 1456 on = None 1457 if self._match(TokenType.ON): 1458 on = True 1459 elif self._match_text_seq("OFF"): 1460 on = False 1461 1462 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1463 1464 def _parse_cluster(self) -> exp.Cluster: 1465 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1466 1467 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1468 self._match_text_seq("BY") 1469 1470 self._match_l_paren() 1471 expressions = self._parse_csv(self._parse_column) 1472 self._match_r_paren() 1473 1474 if self._match_text_seq("SORTED", "BY"): 1475 self._match_l_paren() 1476 sorted_by = self._parse_csv(self._parse_ordered) 1477 self._match_r_paren() 1478 else: 1479 sorted_by = None 1480 1481 self._match(TokenType.INTO) 1482 buckets = self._parse_number() 1483 self._match_text_seq("BUCKETS") 1484 1485 return self.expression( 1486 exp.ClusteredByProperty, 1487 expressions=expressions, 1488 sorted_by=sorted_by, 1489 buckets=buckets, 1490 ) 1491 1492 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1493 if not self._match_text_seq("GRANTS"): 1494 self._retreat(self._index - 1) 1495 return None 1496 1497 return self.expression(exp.CopyGrantsProperty) 1498 1499 def _parse_freespace(self) -> exp.FreespaceProperty: 1500 self._match(TokenType.EQ) 1501 return self.expression( 1502 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1503 ) 1504 1505 def _parse_mergeblockratio( 1506 self, no: bool = False, default: bool = False 1507 ) -> exp.MergeBlockRatioProperty: 1508 if self._match(TokenType.EQ): 1509 return self.expression( 1510 exp.MergeBlockRatioProperty, 1511 this=self._parse_number(), 1512 percent=self._match(TokenType.PERCENT), 1513 ) 1514 1515 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1516 1517 def _parse_datablocksize( 1518 self, 1519 default: t.Optional[bool] = None, 1520 minimum: t.Optional[bool] = None, 1521 maximum: t.Optional[bool] = None, 1522 ) -> exp.DataBlocksizeProperty: 1523 self._match(TokenType.EQ) 1524 size = self._parse_number() 1525 1526 units = None 1527 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1528 units = self._prev.text 1529 1530 return self.expression( 1531 exp.DataBlocksizeProperty, 1532 size=size, 1533 units=units, 1534 default=default, 1535 minimum=minimum, 1536 maximum=maximum, 1537 ) 1538 1539 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1540 self._match(TokenType.EQ) 1541 always = self._match_text_seq("ALWAYS") 1542 manual = self._match_text_seq("MANUAL") 1543 never = self._match_text_seq("NEVER") 1544 default = self._match_text_seq("DEFAULT") 1545 1546 autotemp = None 1547 if self._match_text_seq("AUTOTEMP"): 1548 autotemp = self._parse_schema() 1549 1550 return self.expression( 1551 exp.BlockCompressionProperty, 1552 always=always, 1553 manual=manual, 1554 never=never, 1555 default=default, 1556 autotemp=autotemp, 1557 ) 1558 1559 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1560 no = self._match_text_seq("NO") 1561 concurrent = self._match_text_seq("CONCURRENT") 1562 self._match_text_seq("ISOLATED", "LOADING") 1563 for_all = self._match_text_seq("FOR", "ALL") 1564 for_insert = self._match_text_seq("FOR", "INSERT") 1565 for_none = self._match_text_seq("FOR", "NONE") 1566 return self.expression( 1567 exp.IsolatedLoadingProperty, 1568 no=no, 1569 concurrent=concurrent, 1570 for_all=for_all, 1571 for_insert=for_insert, 1572 for_none=for_none, 1573 ) 1574 1575 def _parse_locking(self) -> exp.LockingProperty: 1576 if self._match(TokenType.TABLE): 1577 kind = "TABLE" 1578 elif self._match(TokenType.VIEW): 1579 kind = "VIEW" 1580 elif self._match(TokenType.ROW): 1581 kind = "ROW" 1582 elif self._match_text_seq("DATABASE"): 1583 kind = "DATABASE" 1584 else: 1585 kind = None 1586 1587 if kind in ("DATABASE", "TABLE", "VIEW"): 1588 this = self._parse_table_parts() 1589 else: 1590 this = None 1591 1592 if self._match(TokenType.FOR): 1593 for_or_in = "FOR" 1594 elif self._match(TokenType.IN): 1595 for_or_in = "IN" 1596 else: 1597 for_or_in = None 1598 1599 if self._match_text_seq("ACCESS"): 1600 lock_type = "ACCESS" 1601 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1602 lock_type = "EXCLUSIVE" 1603 elif self._match_text_seq("SHARE"): 1604 lock_type = "SHARE" 1605 elif self._match_text_seq("READ"): 1606 lock_type = "READ" 1607 elif self._match_text_seq("WRITE"): 1608 lock_type = "WRITE" 1609 elif self._match_text_seq("CHECKSUM"): 1610 lock_type = "CHECKSUM" 1611 else: 1612 lock_type = None 1613 1614 override = self._match_text_seq("OVERRIDE") 1615 1616 return self.expression( 1617 exp.LockingProperty, 1618 this=this, 1619 kind=kind, 1620 for_or_in=for_or_in, 1621 lock_type=lock_type, 1622 override=override, 1623 ) 1624 1625 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1626 if self._match(TokenType.PARTITION_BY): 1627 return self._parse_csv(self._parse_conjunction) 1628 return [] 1629 1630 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1631 self._match(TokenType.EQ) 1632 return self.expression( 1633 exp.PartitionedByProperty, 1634 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1635 ) 1636 1637 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1638 if self._match_text_seq("AND", "STATISTICS"): 1639 statistics = True 1640 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1641 statistics = False 1642 else: 1643 statistics = None 1644 1645 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1646 1647 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1648 if self._match_text_seq("PRIMARY", "INDEX"): 1649 return exp.NoPrimaryIndexProperty() 1650 return None 1651 1652 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1653 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1654 return exp.OnCommitProperty() 1655 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1656 return exp.OnCommitProperty(delete=True) 1657 return None 1658 1659 def _parse_distkey(self) -> exp.DistKeyProperty: 1660 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1661 1662 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1663 table = self._parse_table(schema=True) 1664 1665 options = [] 1666 while self._match_texts(("INCLUDING", "EXCLUDING")): 1667 this = self._prev.text.upper() 1668 1669 id_var = self._parse_id_var() 1670 if not id_var: 1671 return None 1672 1673 options.append( 1674 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1675 ) 1676 1677 return self.expression(exp.LikeProperty, this=table, expressions=options) 1678 1679 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1680 return self.expression( 1681 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1682 ) 1683 1684 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1685 self._match(TokenType.EQ) 1686 return self.expression( 1687 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1688 ) 1689 1690 def _parse_returns(self) -> exp.ReturnsProperty: 1691 value: t.Optional[exp.Expression] 1692 is_table = self._match(TokenType.TABLE) 1693 1694 if is_table: 1695 if self._match(TokenType.LT): 1696 value = self.expression( 1697 exp.Schema, 1698 this="TABLE", 1699 expressions=self._parse_csv(self._parse_struct_types), 1700 ) 1701 if not self._match(TokenType.GT): 1702 self.raise_error("Expecting >") 1703 else: 1704 value = self._parse_schema(exp.var("TABLE")) 1705 else: 1706 value = self._parse_types() 1707 1708 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1709 1710 def _parse_describe(self) -> exp.Describe: 1711 kind = self._match_set(self.CREATABLES) and self._prev.text 1712 this = self._parse_table() 1713 return self.expression(exp.Describe, this=this, kind=kind) 1714 1715 def _parse_insert(self) -> exp.Insert: 1716 comments = ensure_list(self._prev_comments) 1717 overwrite = self._match(TokenType.OVERWRITE) 1718 ignore = self._match(TokenType.IGNORE) 1719 local = self._match_text_seq("LOCAL") 1720 alternative = None 1721 1722 if self._match_text_seq("DIRECTORY"): 1723 this: t.Optional[exp.Expression] = self.expression( 1724 exp.Directory, 1725 this=self._parse_var_or_string(), 1726 local=local, 1727 row_format=self._parse_row_format(match_row=True), 1728 ) 1729 else: 1730 if self._match(TokenType.OR): 1731 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1732 1733 self._match(TokenType.INTO) 1734 comments += ensure_list(self._prev_comments) 1735 self._match(TokenType.TABLE) 1736 this = self._parse_table(schema=True) 1737 1738 returning = self._parse_returning() 1739 1740 return self.expression( 1741 exp.Insert, 1742 comments=comments, 1743 this=this, 1744 exists=self._parse_exists(), 1745 partition=self._parse_partition(), 1746 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1747 and self._parse_conjunction(), 1748 expression=self._parse_ddl_select(), 1749 conflict=self._parse_on_conflict(), 1750 returning=returning or self._parse_returning(), 1751 overwrite=overwrite, 1752 alternative=alternative, 1753 ignore=ignore, 1754 ) 1755 1756 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1757 conflict = self._match_text_seq("ON", "CONFLICT") 1758 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1759 1760 if not conflict and not duplicate: 1761 return None 1762 1763 nothing = None 1764 expressions = None 1765 key = None 1766 constraint = None 1767 1768 if conflict: 1769 if self._match_text_seq("ON", "CONSTRAINT"): 1770 constraint = self._parse_id_var() 1771 else: 1772 key = self._parse_csv(self._parse_value) 1773 1774 self._match_text_seq("DO") 1775 if self._match_text_seq("NOTHING"): 1776 nothing = True 1777 else: 1778 self._match(TokenType.UPDATE) 1779 self._match(TokenType.SET) 1780 expressions = self._parse_csv(self._parse_equality) 1781 1782 return self.expression( 1783 exp.OnConflict, 1784 duplicate=duplicate, 1785 expressions=expressions, 1786 nothing=nothing, 1787 key=key, 1788 constraint=constraint, 1789 ) 1790 1791 def _parse_returning(self) -> t.Optional[exp.Returning]: 1792 if not self._match(TokenType.RETURNING): 1793 return None 1794 return self.expression( 1795 exp.Returning, 1796 expressions=self._parse_csv(self._parse_expression), 1797 into=self._match(TokenType.INTO) and self._parse_table_part(), 1798 ) 1799 1800 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1801 if not self._match(TokenType.FORMAT): 1802 return None 1803 return self._parse_row_format() 1804 1805 def _parse_row_format( 1806 self, match_row: bool = False 1807 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1808 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1809 return None 1810 1811 if self._match_text_seq("SERDE"): 1812 this = self._parse_string() 1813 1814 serde_properties = None 1815 if self._match(TokenType.SERDE_PROPERTIES): 1816 serde_properties = self.expression( 1817 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1818 ) 1819 1820 return self.expression( 1821 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1822 ) 1823 1824 self._match_text_seq("DELIMITED") 1825 1826 kwargs = {} 1827 1828 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1829 kwargs["fields"] = self._parse_string() 1830 if self._match_text_seq("ESCAPED", "BY"): 1831 kwargs["escaped"] = self._parse_string() 1832 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1833 kwargs["collection_items"] = self._parse_string() 1834 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1835 kwargs["map_keys"] = self._parse_string() 1836 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1837 kwargs["lines"] = self._parse_string() 1838 if self._match_text_seq("NULL", "DEFINED", "AS"): 1839 kwargs["null"] = self._parse_string() 1840 1841 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1842 1843 def _parse_load(self) -> exp.LoadData | exp.Command: 1844 if self._match_text_seq("DATA"): 1845 local = self._match_text_seq("LOCAL") 1846 self._match_text_seq("INPATH") 1847 inpath = self._parse_string() 1848 overwrite = self._match(TokenType.OVERWRITE) 1849 self._match_pair(TokenType.INTO, TokenType.TABLE) 1850 1851 return self.expression( 1852 exp.LoadData, 1853 this=self._parse_table(schema=True), 1854 local=local, 1855 overwrite=overwrite, 1856 inpath=inpath, 1857 partition=self._parse_partition(), 1858 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1859 serde=self._match_text_seq("SERDE") and self._parse_string(), 1860 ) 1861 return self._parse_as_command(self._prev) 1862 1863 def _parse_delete(self) -> exp.Delete: 1864 # This handles MySQL's "Multiple-Table Syntax" 1865 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1866 tables = None 1867 comments = self._prev_comments 1868 if not self._match(TokenType.FROM, advance=False): 1869 tables = self._parse_csv(self._parse_table) or None 1870 1871 returning = self._parse_returning() 1872 1873 return self.expression( 1874 exp.Delete, 1875 comments=comments, 1876 tables=tables, 1877 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1878 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1879 where=self._parse_where(), 1880 returning=returning or self._parse_returning(), 1881 limit=self._parse_limit(), 1882 ) 1883 1884 def _parse_update(self) -> exp.Update: 1885 comments = self._prev_comments 1886 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1887 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1888 returning = self._parse_returning() 1889 return self.expression( 1890 exp.Update, 1891 comments=comments, 1892 **{ # type: ignore 1893 "this": this, 1894 "expressions": expressions, 1895 "from": self._parse_from(joins=True), 1896 "where": self._parse_where(), 1897 "returning": returning or self._parse_returning(), 1898 "limit": self._parse_limit(), 1899 }, 1900 ) 1901 1902 def _parse_uncache(self) -> exp.Uncache: 1903 if not self._match(TokenType.TABLE): 1904 self.raise_error("Expecting TABLE after UNCACHE") 1905 1906 return self.expression( 1907 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1908 ) 1909 1910 def _parse_cache(self) -> exp.Cache: 1911 lazy = self._match_text_seq("LAZY") 1912 self._match(TokenType.TABLE) 1913 table = self._parse_table(schema=True) 1914 1915 options = [] 1916 if self._match_text_seq("OPTIONS"): 1917 self._match_l_paren() 1918 k = self._parse_string() 1919 self._match(TokenType.EQ) 1920 v = self._parse_string() 1921 options = [k, v] 1922 self._match_r_paren() 1923 1924 self._match(TokenType.ALIAS) 1925 return self.expression( 1926 exp.Cache, 1927 this=table, 1928 lazy=lazy, 1929 options=options, 1930 expression=self._parse_select(nested=True), 1931 ) 1932 1933 def _parse_partition(self) -> t.Optional[exp.Partition]: 1934 if not self._match(TokenType.PARTITION): 1935 return None 1936 1937 return self.expression( 1938 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1939 ) 1940 1941 def _parse_value(self) -> exp.Tuple: 1942 if self._match(TokenType.L_PAREN): 1943 expressions = self._parse_csv(self._parse_conjunction) 1944 self._match_r_paren() 1945 return self.expression(exp.Tuple, expressions=expressions) 1946 1947 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1948 # https://prestodb.io/docs/current/sql/values.html 1949 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1950 1951 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1952 return self._parse_expressions() 1953 1954 def _parse_select( 1955 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1956 ) -> t.Optional[exp.Expression]: 1957 cte = self._parse_with() 1958 if cte: 1959 this = self._parse_statement() 1960 1961 if not this: 1962 self.raise_error("Failed to parse any statement following CTE") 1963 return cte 1964 1965 if "with" in this.arg_types: 1966 this.set("with", cte) 1967 else: 1968 self.raise_error(f"{this.key} does not support CTE") 1969 this = cte 1970 elif self._match(TokenType.SELECT): 1971 comments = self._prev_comments 1972 1973 hint = self._parse_hint() 1974 all_ = self._match(TokenType.ALL) 1975 distinct = self._match(TokenType.DISTINCT) 1976 1977 kind = ( 1978 self._match(TokenType.ALIAS) 1979 and self._match_texts(("STRUCT", "VALUE")) 1980 and self._prev.text 1981 ) 1982 1983 if distinct: 1984 distinct = self.expression( 1985 exp.Distinct, 1986 on=self._parse_value() if self._match(TokenType.ON) else None, 1987 ) 1988 1989 if all_ and distinct: 1990 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1991 1992 limit = self._parse_limit(top=True) 1993 projections = self._parse_projections() 1994 1995 this = self.expression( 1996 exp.Select, 1997 kind=kind, 1998 hint=hint, 1999 distinct=distinct, 2000 expressions=projections, 2001 limit=limit, 2002 ) 2003 this.comments = comments 2004 2005 into = self._parse_into() 2006 if into: 2007 this.set("into", into) 2008 2009 from_ = self._parse_from() 2010 if from_: 2011 this.set("from", from_) 2012 2013 this = self._parse_query_modifiers(this) 2014 elif (table or nested) and self._match(TokenType.L_PAREN): 2015 if self._match(TokenType.PIVOT): 2016 this = self._parse_simplified_pivot() 2017 elif self._match(TokenType.FROM): 2018 this = exp.select("*").from_( 2019 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2020 ) 2021 else: 2022 this = self._parse_table() if table else self._parse_select(nested=True) 2023 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2024 2025 self._match_r_paren() 2026 2027 # We return early here so that the UNION isn't attached to the subquery by the 2028 # following call to _parse_set_operations, but instead becomes the parent node 2029 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2030 elif self._match(TokenType.VALUES): 2031 this = self.expression( 2032 exp.Values, 2033 expressions=self._parse_csv(self._parse_value), 2034 alias=self._parse_table_alias(), 2035 ) 2036 else: 2037 this = None 2038 2039 return self._parse_set_operations(this) 2040 2041 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2042 if not skip_with_token and not self._match(TokenType.WITH): 2043 return None 2044 2045 comments = self._prev_comments 2046 recursive = self._match(TokenType.RECURSIVE) 2047 2048 expressions = [] 2049 while True: 2050 expressions.append(self._parse_cte()) 2051 2052 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2053 break 2054 else: 2055 self._match(TokenType.WITH) 2056 2057 return self.expression( 2058 exp.With, comments=comments, expressions=expressions, recursive=recursive 2059 ) 2060 2061 def _parse_cte(self) -> exp.CTE: 2062 alias = self._parse_table_alias() 2063 if not alias or not alias.this: 2064 self.raise_error("Expected CTE to have alias") 2065 2066 self._match(TokenType.ALIAS) 2067 return self.expression( 2068 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2069 ) 2070 2071 def _parse_table_alias( 2072 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2073 ) -> t.Optional[exp.TableAlias]: 2074 any_token = self._match(TokenType.ALIAS) 2075 alias = ( 2076 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2077 or self._parse_string_as_identifier() 2078 ) 2079 2080 index = self._index 2081 if self._match(TokenType.L_PAREN): 2082 columns = self._parse_csv(self._parse_function_parameter) 2083 self._match_r_paren() if columns else self._retreat(index) 2084 else: 2085 columns = None 2086 2087 if not alias and not columns: 2088 return None 2089 2090 return self.expression(exp.TableAlias, this=alias, columns=columns) 2091 2092 def _parse_subquery( 2093 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2094 ) -> t.Optional[exp.Subquery]: 2095 if not this: 2096 return None 2097 2098 return self.expression( 2099 exp.Subquery, 2100 this=this, 2101 pivots=self._parse_pivots(), 2102 alias=self._parse_table_alias() if parse_alias else None, 2103 ) 2104 2105 def _parse_query_modifiers( 2106 self, this: t.Optional[exp.Expression] 2107 ) -> t.Optional[exp.Expression]: 2108 if isinstance(this, self.MODIFIABLES): 2109 for join in iter(self._parse_join, None): 2110 this.append("joins", join) 2111 for lateral in iter(self._parse_lateral, None): 2112 this.append("laterals", lateral) 2113 2114 while True: 2115 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2116 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2117 key, expression = parser(self) 2118 2119 if expression: 2120 this.set(key, expression) 2121 if key == "limit": 2122 offset = expression.args.pop("offset", None) 2123 if offset: 2124 this.set("offset", exp.Offset(expression=offset)) 2125 continue 2126 break 2127 return this 2128 2129 def _parse_hint(self) -> t.Optional[exp.Hint]: 2130 if self._match(TokenType.HINT): 2131 hints = [] 2132 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2133 hints.extend(hint) 2134 2135 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2136 self.raise_error("Expected */ after HINT") 2137 2138 return self.expression(exp.Hint, expressions=hints) 2139 2140 return None 2141 2142 def _parse_into(self) -> t.Optional[exp.Into]: 2143 if not self._match(TokenType.INTO): 2144 return None 2145 2146 temp = self._match(TokenType.TEMPORARY) 2147 unlogged = self._match_text_seq("UNLOGGED") 2148 self._match(TokenType.TABLE) 2149 2150 return self.expression( 2151 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2152 ) 2153 2154 def _parse_from( 2155 self, joins: bool = False, skip_from_token: bool = False 2156 ) -> t.Optional[exp.From]: 2157 if not skip_from_token and not self._match(TokenType.FROM): 2158 return None 2159 2160 return self.expression( 2161 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2162 ) 2163 2164 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2165 if not self._match(TokenType.MATCH_RECOGNIZE): 2166 return None 2167 2168 self._match_l_paren() 2169 2170 partition = self._parse_partition_by() 2171 order = self._parse_order() 2172 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2173 2174 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2175 rows = exp.var("ONE ROW PER MATCH") 2176 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2177 text = "ALL ROWS PER MATCH" 2178 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2179 text += f" SHOW EMPTY MATCHES" 2180 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2181 text += f" OMIT EMPTY MATCHES" 2182 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2183 text += f" WITH UNMATCHED ROWS" 2184 rows = exp.var(text) 2185 else: 2186 rows = None 2187 2188 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2189 text = "AFTER MATCH SKIP" 2190 if self._match_text_seq("PAST", "LAST", "ROW"): 2191 text += f" PAST LAST ROW" 2192 elif self._match_text_seq("TO", "NEXT", "ROW"): 2193 text += f" TO NEXT ROW" 2194 elif self._match_text_seq("TO", "FIRST"): 2195 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2196 elif self._match_text_seq("TO", "LAST"): 2197 text += f" TO LAST {self._advance_any().text}" # type: ignore 2198 after = exp.var(text) 2199 else: 2200 after = None 2201 2202 if self._match_text_seq("PATTERN"): 2203 self._match_l_paren() 2204 2205 if not self._curr: 2206 self.raise_error("Expecting )", self._curr) 2207 2208 paren = 1 2209 start = self._curr 2210 2211 while self._curr and paren > 0: 2212 if self._curr.token_type == TokenType.L_PAREN: 2213 paren += 1 2214 if self._curr.token_type == TokenType.R_PAREN: 2215 paren -= 1 2216 2217 end = self._prev 2218 self._advance() 2219 2220 if paren > 0: 2221 self.raise_error("Expecting )", self._curr) 2222 2223 pattern = exp.var(self._find_sql(start, end)) 2224 else: 2225 pattern = None 2226 2227 define = ( 2228 self._parse_csv( 2229 lambda: self.expression( 2230 exp.Alias, 2231 alias=self._parse_id_var(any_token=True), 2232 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2233 ) 2234 ) 2235 if self._match_text_seq("DEFINE") 2236 else None 2237 ) 2238 2239 self._match_r_paren() 2240 2241 return self.expression( 2242 exp.MatchRecognize, 2243 partition_by=partition, 2244 order=order, 2245 measures=measures, 2246 rows=rows, 2247 after=after, 2248 pattern=pattern, 2249 define=define, 2250 alias=self._parse_table_alias(), 2251 ) 2252 2253 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2254 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2255 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2256 2257 if outer_apply or cross_apply: 2258 this = self._parse_select(table=True) 2259 view = None 2260 outer = not cross_apply 2261 elif self._match(TokenType.LATERAL): 2262 this = self._parse_select(table=True) 2263 view = self._match(TokenType.VIEW) 2264 outer = self._match(TokenType.OUTER) 2265 else: 2266 return None 2267 2268 if not this: 2269 this = ( 2270 self._parse_unnest() 2271 or self._parse_function() 2272 or self._parse_id_var(any_token=False) 2273 ) 2274 2275 while self._match(TokenType.DOT): 2276 this = exp.Dot( 2277 this=this, 2278 expression=self._parse_function() or self._parse_id_var(any_token=False), 2279 ) 2280 2281 if view: 2282 table = self._parse_id_var(any_token=False) 2283 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2284 table_alias: t.Optional[exp.TableAlias] = self.expression( 2285 exp.TableAlias, this=table, columns=columns 2286 ) 2287 elif isinstance(this, exp.Subquery) and this.alias: 2288 # Ensures parity between the Subquery's and the Lateral's "alias" args 2289 table_alias = this.args["alias"].copy() 2290 else: 2291 table_alias = self._parse_table_alias() 2292 2293 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2294 2295 def _parse_join_parts( 2296 self, 2297 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2298 return ( 2299 self._match_set(self.JOIN_METHODS) and self._prev, 2300 self._match_set(self.JOIN_SIDES) and self._prev, 2301 self._match_set(self.JOIN_KINDS) and self._prev, 2302 ) 2303 2304 def _parse_join( 2305 self, skip_join_token: bool = False, parse_bracket: bool = False 2306 ) -> t.Optional[exp.Join]: 2307 if self._match(TokenType.COMMA): 2308 return self.expression(exp.Join, this=self._parse_table()) 2309 2310 index = self._index 2311 method, side, kind = self._parse_join_parts() 2312 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2313 join = self._match(TokenType.JOIN) 2314 2315 if not skip_join_token and not join: 2316 self._retreat(index) 2317 kind = None 2318 method = None 2319 side = None 2320 2321 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2322 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2323 2324 if not skip_join_token and not join and not outer_apply and not cross_apply: 2325 return None 2326 2327 if outer_apply: 2328 side = Token(TokenType.LEFT, "LEFT") 2329 2330 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2331 2332 if method: 2333 kwargs["method"] = method.text 2334 if side: 2335 kwargs["side"] = side.text 2336 if kind: 2337 kwargs["kind"] = kind.text 2338 if hint: 2339 kwargs["hint"] = hint 2340 2341 if self._match(TokenType.ON): 2342 kwargs["on"] = self._parse_conjunction() 2343 elif self._match(TokenType.USING): 2344 kwargs["using"] = self._parse_wrapped_id_vars() 2345 elif not (kind and kind.token_type == TokenType.CROSS): 2346 index = self._index 2347 joins = self._parse_joins() 2348 2349 if joins and self._match(TokenType.ON): 2350 kwargs["on"] = self._parse_conjunction() 2351 elif joins and self._match(TokenType.USING): 2352 kwargs["using"] = self._parse_wrapped_id_vars() 2353 else: 2354 joins = None 2355 self._retreat(index) 2356 2357 kwargs["this"].set("joins", joins) 2358 2359 comments = [c for token in (method, side, kind) if token for c in token.comments] 2360 return self.expression(exp.Join, comments=comments, **kwargs) 2361 2362 def _parse_index( 2363 self, 2364 index: t.Optional[exp.Expression] = None, 2365 ) -> t.Optional[exp.Index]: 2366 if index: 2367 unique = None 2368 primary = None 2369 amp = None 2370 2371 self._match(TokenType.ON) 2372 self._match(TokenType.TABLE) # hive 2373 table = self._parse_table_parts(schema=True) 2374 else: 2375 unique = self._match(TokenType.UNIQUE) 2376 primary = self._match_text_seq("PRIMARY") 2377 amp = self._match_text_seq("AMP") 2378 2379 if not self._match(TokenType.INDEX): 2380 return None 2381 2382 index = self._parse_id_var() 2383 table = None 2384 2385 using = self._parse_field() if self._match(TokenType.USING) else None 2386 2387 if self._match(TokenType.L_PAREN, advance=False): 2388 columns = self._parse_wrapped_csv(self._parse_ordered) 2389 else: 2390 columns = None 2391 2392 return self.expression( 2393 exp.Index, 2394 this=index, 2395 table=table, 2396 using=using, 2397 columns=columns, 2398 unique=unique, 2399 primary=primary, 2400 amp=amp, 2401 partition_by=self._parse_partition_by(), 2402 ) 2403 2404 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2405 hints: t.List[exp.Expression] = [] 2406 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2407 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2408 hints.append( 2409 self.expression( 2410 exp.WithTableHint, 2411 expressions=self._parse_csv( 2412 lambda: self._parse_function() or self._parse_var(any_token=True) 2413 ), 2414 ) 2415 ) 2416 self._match_r_paren() 2417 else: 2418 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2419 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2420 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2421 2422 self._match_texts({"INDEX", "KEY"}) 2423 if self._match(TokenType.FOR): 2424 hint.set("target", self._advance_any() and self._prev.text.upper()) 2425 2426 hint.set("expressions", self._parse_wrapped_id_vars()) 2427 hints.append(hint) 2428 2429 return hints or None 2430 2431 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2432 return ( 2433 (not schema and self._parse_function(optional_parens=False)) 2434 or self._parse_id_var(any_token=False) 2435 or self._parse_string_as_identifier() 2436 or self._parse_placeholder() 2437 ) 2438 2439 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2440 catalog = None 2441 db = None 2442 table = self._parse_table_part(schema=schema) 2443 2444 while self._match(TokenType.DOT): 2445 if catalog: 2446 # This allows nesting the table in arbitrarily many dot expressions if needed 2447 table = self.expression( 2448 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2449 ) 2450 else: 2451 catalog = db 2452 db = table 2453 table = self._parse_table_part(schema=schema) 2454 2455 if not table: 2456 self.raise_error(f"Expected table name but got {self._curr}") 2457 2458 return self.expression( 2459 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2460 ) 2461 2462 def _parse_table( 2463 self, 2464 schema: bool = False, 2465 joins: bool = False, 2466 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2467 parse_bracket: bool = False, 2468 ) -> t.Optional[exp.Expression]: 2469 lateral = self._parse_lateral() 2470 if lateral: 2471 return lateral 2472 2473 unnest = self._parse_unnest() 2474 if unnest: 2475 return unnest 2476 2477 values = self._parse_derived_table_values() 2478 if values: 2479 return values 2480 2481 subquery = self._parse_select(table=True) 2482 if subquery: 2483 if not subquery.args.get("pivots"): 2484 subquery.set("pivots", self._parse_pivots()) 2485 return subquery 2486 2487 bracket = parse_bracket and self._parse_bracket(None) 2488 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2489 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2490 2491 if schema: 2492 return self._parse_schema(this=this) 2493 2494 if self.ALIAS_POST_TABLESAMPLE: 2495 table_sample = self._parse_table_sample() 2496 2497 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2498 if alias: 2499 this.set("alias", alias) 2500 2501 if not this.args.get("pivots"): 2502 this.set("pivots", self._parse_pivots()) 2503 2504 this.set("hints", self._parse_table_hints()) 2505 2506 if not self.ALIAS_POST_TABLESAMPLE: 2507 table_sample = self._parse_table_sample() 2508 2509 if table_sample: 2510 table_sample.set("this", this) 2511 this = table_sample 2512 2513 if joins: 2514 for join in iter(self._parse_join, None): 2515 this.append("joins", join) 2516 2517 return this 2518 2519 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2520 if not self._match(TokenType.UNNEST): 2521 return None 2522 2523 expressions = self._parse_wrapped_csv(self._parse_type) 2524 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2525 2526 alias = self._parse_table_alias() if with_alias else None 2527 2528 if alias and self.UNNEST_COLUMN_ONLY: 2529 if alias.args.get("columns"): 2530 self.raise_error("Unexpected extra column alias in unnest.") 2531 2532 alias.set("columns", [alias.this]) 2533 alias.set("this", None) 2534 2535 offset = None 2536 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2537 self._match(TokenType.ALIAS) 2538 offset = self._parse_id_var() or exp.to_identifier("offset") 2539 2540 return self.expression( 2541 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2542 ) 2543 2544 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2545 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2546 if not is_derived and not self._match(TokenType.VALUES): 2547 return None 2548 2549 expressions = self._parse_csv(self._parse_value) 2550 alias = self._parse_table_alias() 2551 2552 if is_derived: 2553 self._match_r_paren() 2554 2555 return self.expression( 2556 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2557 ) 2558 2559 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2560 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2561 as_modifier and self._match_text_seq("USING", "SAMPLE") 2562 ): 2563 return None 2564 2565 bucket_numerator = None 2566 bucket_denominator = None 2567 bucket_field = None 2568 percent = None 2569 rows = None 2570 size = None 2571 seed = None 2572 2573 kind = ( 2574 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2575 ) 2576 method = self._parse_var(tokens=(TokenType.ROW,)) 2577 2578 self._match(TokenType.L_PAREN) 2579 2580 num = self._parse_number() 2581 2582 if self._match_text_seq("BUCKET"): 2583 bucket_numerator = self._parse_number() 2584 self._match_text_seq("OUT", "OF") 2585 bucket_denominator = bucket_denominator = self._parse_number() 2586 self._match(TokenType.ON) 2587 bucket_field = self._parse_field() 2588 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2589 percent = num 2590 elif self._match(TokenType.ROWS): 2591 rows = num 2592 else: 2593 size = num 2594 2595 self._match(TokenType.R_PAREN) 2596 2597 if self._match(TokenType.L_PAREN): 2598 method = self._parse_var() 2599 seed = self._match(TokenType.COMMA) and self._parse_number() 2600 self._match_r_paren() 2601 elif self._match_texts(("SEED", "REPEATABLE")): 2602 seed = self._parse_wrapped(self._parse_number) 2603 2604 return self.expression( 2605 exp.TableSample, 2606 method=method, 2607 bucket_numerator=bucket_numerator, 2608 bucket_denominator=bucket_denominator, 2609 bucket_field=bucket_field, 2610 percent=percent, 2611 rows=rows, 2612 size=size, 2613 seed=seed, 2614 kind=kind, 2615 ) 2616 2617 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2618 return list(iter(self._parse_pivot, None)) or None 2619 2620 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2621 return list(iter(self._parse_join, None)) or None 2622 2623 # https://duckdb.org/docs/sql/statements/pivot 2624 def _parse_simplified_pivot(self) -> exp.Pivot: 2625 def _parse_on() -> t.Optional[exp.Expression]: 2626 this = self._parse_bitwise() 2627 return self._parse_in(this) if self._match(TokenType.IN) else this 2628 2629 this = self._parse_table() 2630 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2631 using = self._match(TokenType.USING) and self._parse_csv( 2632 lambda: self._parse_alias(self._parse_function()) 2633 ) 2634 group = self._parse_group() 2635 return self.expression( 2636 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2637 ) 2638 2639 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2640 index = self._index 2641 include_nulls = None 2642 2643 if self._match(TokenType.PIVOT): 2644 unpivot = False 2645 elif self._match(TokenType.UNPIVOT): 2646 unpivot = True 2647 2648 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2649 if self._match_text_seq("INCLUDE", "NULLS"): 2650 include_nulls = True 2651 elif self._match_text_seq("EXCLUDE", "NULLS"): 2652 include_nulls = False 2653 else: 2654 return None 2655 2656 expressions = [] 2657 field = None 2658 2659 if not self._match(TokenType.L_PAREN): 2660 self._retreat(index) 2661 return None 2662 2663 if unpivot: 2664 expressions = self._parse_csv(self._parse_column) 2665 else: 2666 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2667 2668 if not expressions: 2669 self.raise_error("Failed to parse PIVOT's aggregation list") 2670 2671 if not self._match(TokenType.FOR): 2672 self.raise_error("Expecting FOR") 2673 2674 value = self._parse_column() 2675 2676 if not self._match(TokenType.IN): 2677 self.raise_error("Expecting IN") 2678 2679 field = self._parse_in(value, alias=True) 2680 2681 self._match_r_paren() 2682 2683 pivot = self.expression( 2684 exp.Pivot, 2685 expressions=expressions, 2686 field=field, 2687 unpivot=unpivot, 2688 include_nulls=include_nulls, 2689 ) 2690 2691 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2692 pivot.set("alias", self._parse_table_alias()) 2693 2694 if not unpivot: 2695 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2696 2697 columns: t.List[exp.Expression] = [] 2698 for fld in pivot.args["field"].expressions: 2699 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2700 for name in names: 2701 if self.PREFIXED_PIVOT_COLUMNS: 2702 name = f"{name}_{field_name}" if name else field_name 2703 else: 2704 name = f"{field_name}_{name}" if name else field_name 2705 2706 columns.append(exp.to_identifier(name)) 2707 2708 pivot.set("columns", columns) 2709 2710 return pivot 2711 2712 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2713 return [agg.alias for agg in aggregations] 2714 2715 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2716 if not skip_where_token and not self._match(TokenType.WHERE): 2717 return None 2718 2719 return self.expression( 2720 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2721 ) 2722 2723 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2724 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2725 return None 2726 2727 elements = defaultdict(list) 2728 2729 if self._match(TokenType.ALL): 2730 return self.expression(exp.Group, all=True) 2731 2732 while True: 2733 expressions = self._parse_csv(self._parse_conjunction) 2734 if expressions: 2735 elements["expressions"].extend(expressions) 2736 2737 grouping_sets = self._parse_grouping_sets() 2738 if grouping_sets: 2739 elements["grouping_sets"].extend(grouping_sets) 2740 2741 rollup = None 2742 cube = None 2743 totals = None 2744 2745 with_ = self._match(TokenType.WITH) 2746 if self._match(TokenType.ROLLUP): 2747 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2748 elements["rollup"].extend(ensure_list(rollup)) 2749 2750 if self._match(TokenType.CUBE): 2751 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2752 elements["cube"].extend(ensure_list(cube)) 2753 2754 if self._match_text_seq("TOTALS"): 2755 totals = True 2756 elements["totals"] = True # type: ignore 2757 2758 if not (grouping_sets or rollup or cube or totals): 2759 break 2760 2761 return self.expression(exp.Group, **elements) # type: ignore 2762 2763 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2764 if not self._match(TokenType.GROUPING_SETS): 2765 return None 2766 2767 return self._parse_wrapped_csv(self._parse_grouping_set) 2768 2769 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2770 if self._match(TokenType.L_PAREN): 2771 grouping_set = self._parse_csv(self._parse_column) 2772 self._match_r_paren() 2773 return self.expression(exp.Tuple, expressions=grouping_set) 2774 2775 return self._parse_column() 2776 2777 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2778 if not skip_having_token and not self._match(TokenType.HAVING): 2779 return None 2780 return self.expression(exp.Having, this=self._parse_conjunction()) 2781 2782 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2783 if not self._match(TokenType.QUALIFY): 2784 return None 2785 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2786 2787 def _parse_order( 2788 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2789 ) -> t.Optional[exp.Expression]: 2790 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2791 return this 2792 2793 return self.expression( 2794 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2795 ) 2796 2797 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2798 if not self._match(token): 2799 return None 2800 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2801 2802 def _parse_ordered(self) -> exp.Ordered: 2803 this = self._parse_conjunction() 2804 self._match(TokenType.ASC) 2805 2806 is_desc = self._match(TokenType.DESC) 2807 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2808 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2809 desc = is_desc or False 2810 asc = not desc 2811 nulls_first = is_nulls_first or False 2812 explicitly_null_ordered = is_nulls_first or is_nulls_last 2813 2814 if ( 2815 not explicitly_null_ordered 2816 and ( 2817 (asc and self.NULL_ORDERING == "nulls_are_small") 2818 or (desc and self.NULL_ORDERING != "nulls_are_small") 2819 ) 2820 and self.NULL_ORDERING != "nulls_are_last" 2821 ): 2822 nulls_first = True 2823 2824 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2825 2826 def _parse_limit( 2827 self, this: t.Optional[exp.Expression] = None, top: bool = False 2828 ) -> t.Optional[exp.Expression]: 2829 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2830 comments = self._prev_comments 2831 if top: 2832 limit_paren = self._match(TokenType.L_PAREN) 2833 expression = self._parse_number() 2834 2835 if limit_paren: 2836 self._match_r_paren() 2837 else: 2838 expression = self._parse_term() 2839 2840 if self._match(TokenType.COMMA): 2841 offset = expression 2842 expression = self._parse_term() 2843 else: 2844 offset = None 2845 2846 limit_exp = self.expression( 2847 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2848 ) 2849 2850 return limit_exp 2851 2852 if self._match(TokenType.FETCH): 2853 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2854 direction = self._prev.text if direction else "FIRST" 2855 2856 count = self._parse_number() 2857 percent = self._match(TokenType.PERCENT) 2858 2859 self._match_set((TokenType.ROW, TokenType.ROWS)) 2860 2861 only = self._match_text_seq("ONLY") 2862 with_ties = self._match_text_seq("WITH", "TIES") 2863 2864 if only and with_ties: 2865 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2866 2867 return self.expression( 2868 exp.Fetch, 2869 direction=direction, 2870 count=count, 2871 percent=percent, 2872 with_ties=with_ties, 2873 ) 2874 2875 return this 2876 2877 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2878 if not self._match(TokenType.OFFSET): 2879 return this 2880 2881 count = self._parse_term() 2882 self._match_set((TokenType.ROW, TokenType.ROWS)) 2883 return self.expression(exp.Offset, this=this, expression=count) 2884 2885 def _parse_locks(self) -> t.List[exp.Lock]: 2886 locks = [] 2887 while True: 2888 if self._match_text_seq("FOR", "UPDATE"): 2889 update = True 2890 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2891 "LOCK", "IN", "SHARE", "MODE" 2892 ): 2893 update = False 2894 else: 2895 break 2896 2897 expressions = None 2898 if self._match_text_seq("OF"): 2899 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2900 2901 wait: t.Optional[bool | exp.Expression] = None 2902 if self._match_text_seq("NOWAIT"): 2903 wait = True 2904 elif self._match_text_seq("WAIT"): 2905 wait = self._parse_primary() 2906 elif self._match_text_seq("SKIP", "LOCKED"): 2907 wait = False 2908 2909 locks.append( 2910 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2911 ) 2912 2913 return locks 2914 2915 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2916 if not self._match_set(self.SET_OPERATIONS): 2917 return this 2918 2919 token_type = self._prev.token_type 2920 2921 if token_type == TokenType.UNION: 2922 expression = exp.Union 2923 elif token_type == TokenType.EXCEPT: 2924 expression = exp.Except 2925 else: 2926 expression = exp.Intersect 2927 2928 return self.expression( 2929 expression, 2930 this=this, 2931 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2932 expression=self._parse_set_operations(self._parse_select(nested=True)), 2933 ) 2934 2935 def _parse_expression(self) -> t.Optional[exp.Expression]: 2936 return self._parse_alias(self._parse_conjunction()) 2937 2938 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2939 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2940 2941 def _parse_equality(self) -> t.Optional[exp.Expression]: 2942 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2943 2944 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2945 return self._parse_tokens(self._parse_range, self.COMPARISON) 2946 2947 def _parse_range(self) -> t.Optional[exp.Expression]: 2948 this = self._parse_bitwise() 2949 negate = self._match(TokenType.NOT) 2950 2951 if self._match_set(self.RANGE_PARSERS): 2952 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2953 if not expression: 2954 return this 2955 2956 this = expression 2957 elif self._match(TokenType.ISNULL): 2958 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2959 2960 # Postgres supports ISNULL and NOTNULL for conditions. 2961 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2962 if self._match(TokenType.NOTNULL): 2963 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2964 this = self.expression(exp.Not, this=this) 2965 2966 if negate: 2967 this = self.expression(exp.Not, this=this) 2968 2969 if self._match(TokenType.IS): 2970 this = self._parse_is(this) 2971 2972 return this 2973 2974 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2975 index = self._index - 1 2976 negate = self._match(TokenType.NOT) 2977 2978 if self._match_text_seq("DISTINCT", "FROM"): 2979 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2980 return self.expression(klass, this=this, expression=self._parse_expression()) 2981 2982 expression = self._parse_null() or self._parse_boolean() 2983 if not expression: 2984 self._retreat(index) 2985 return None 2986 2987 this = self.expression(exp.Is, this=this, expression=expression) 2988 return self.expression(exp.Not, this=this) if negate else this 2989 2990 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2991 unnest = self._parse_unnest(with_alias=False) 2992 if unnest: 2993 this = self.expression(exp.In, this=this, unnest=unnest) 2994 elif self._match(TokenType.L_PAREN): 2995 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2996 2997 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2998 this = self.expression(exp.In, this=this, query=expressions[0]) 2999 else: 3000 this = self.expression(exp.In, this=this, expressions=expressions) 3001 3002 self._match_r_paren(this) 3003 else: 3004 this = self.expression(exp.In, this=this, field=self._parse_field()) 3005 3006 return this 3007 3008 def _parse_between(self, this: exp.Expression) -> exp.Between: 3009 low = self._parse_bitwise() 3010 self._match(TokenType.AND) 3011 high = self._parse_bitwise() 3012 return self.expression(exp.Between, this=this, low=low, high=high) 3013 3014 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3015 if not self._match(TokenType.ESCAPE): 3016 return this 3017 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3018 3019 def _parse_interval(self) -> t.Optional[exp.Interval]: 3020 if not self._match(TokenType.INTERVAL): 3021 return None 3022 3023 if self._match(TokenType.STRING, advance=False): 3024 this = self._parse_primary() 3025 else: 3026 this = self._parse_term() 3027 3028 unit = self._parse_function() or self._parse_var() 3029 3030 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3031 # each INTERVAL expression into this canonical form so it's easy to transpile 3032 if this and this.is_number: 3033 this = exp.Literal.string(this.name) 3034 elif this and this.is_string: 3035 parts = this.name.split() 3036 3037 if len(parts) == 2: 3038 if unit: 3039 # this is not actually a unit, it's something else 3040 unit = None 3041 self._retreat(self._index - 1) 3042 else: 3043 this = exp.Literal.string(parts[0]) 3044 unit = self.expression(exp.Var, this=parts[1]) 3045 3046 return self.expression(exp.Interval, this=this, unit=unit) 3047 3048 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3049 this = self._parse_term() 3050 3051 while True: 3052 if self._match_set(self.BITWISE): 3053 this = self.expression( 3054 self.BITWISE[self._prev.token_type], 3055 this=this, 3056 expression=self._parse_term(), 3057 ) 3058 elif self._match(TokenType.DQMARK): 3059 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3060 elif self._match_pair(TokenType.LT, TokenType.LT): 3061 this = self.expression( 3062 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3063 ) 3064 elif self._match_pair(TokenType.GT, TokenType.GT): 3065 this = self.expression( 3066 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3067 ) 3068 else: 3069 break 3070 3071 return this 3072 3073 def _parse_term(self) -> t.Optional[exp.Expression]: 3074 return self._parse_tokens(self._parse_factor, self.TERM) 3075 3076 def _parse_factor(self) -> t.Optional[exp.Expression]: 3077 return self._parse_tokens(self._parse_unary, self.FACTOR) 3078 3079 def _parse_unary(self) -> t.Optional[exp.Expression]: 3080 if self._match_set(self.UNARY_PARSERS): 3081 return self.UNARY_PARSERS[self._prev.token_type](self) 3082 return self._parse_at_time_zone(self._parse_type()) 3083 3084 def _parse_type(self) -> t.Optional[exp.Expression]: 3085 interval = self._parse_interval() 3086 if interval: 3087 return interval 3088 3089 index = self._index 3090 data_type = self._parse_types(check_func=True) 3091 this = self._parse_column() 3092 3093 if data_type: 3094 if isinstance(this, exp.Literal): 3095 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3096 if parser: 3097 return parser(self, this, data_type) 3098 return self.expression(exp.Cast, this=this, to=data_type) 3099 if not data_type.expressions: 3100 self._retreat(index) 3101 return self._parse_column() 3102 return self._parse_column_ops(data_type) 3103 3104 return this 3105 3106 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3107 this = self._parse_type() 3108 if not this: 3109 return None 3110 3111 return self.expression( 3112 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3113 ) 3114 3115 def _parse_types( 3116 self, check_func: bool = False, schema: bool = False 3117 ) -> t.Optional[exp.Expression]: 3118 index = self._index 3119 3120 prefix = self._match_text_seq("SYSUDTLIB", ".") 3121 3122 if not self._match_set(self.TYPE_TOKENS): 3123 return None 3124 3125 type_token = self._prev.token_type 3126 3127 if type_token == TokenType.PSEUDO_TYPE: 3128 return self.expression(exp.PseudoType, this=self._prev.text) 3129 3130 nested = type_token in self.NESTED_TYPE_TOKENS 3131 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3132 expressions = None 3133 maybe_func = False 3134 3135 if self._match(TokenType.L_PAREN): 3136 if is_struct: 3137 expressions = self._parse_csv(self._parse_struct_types) 3138 elif nested: 3139 expressions = self._parse_csv( 3140 lambda: self._parse_types(check_func=check_func, schema=schema) 3141 ) 3142 elif type_token in self.ENUM_TYPE_TOKENS: 3143 expressions = self._parse_csv(self._parse_equality) 3144 else: 3145 expressions = self._parse_csv(self._parse_type_size) 3146 3147 if not expressions or not self._match(TokenType.R_PAREN): 3148 self._retreat(index) 3149 return None 3150 3151 maybe_func = True 3152 3153 this: t.Optional[exp.Expression] = None 3154 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3155 3156 if nested and self._match(TokenType.LT): 3157 if is_struct: 3158 expressions = self._parse_csv(self._parse_struct_types) 3159 else: 3160 expressions = self._parse_csv( 3161 lambda: self._parse_types(check_func=check_func, schema=schema) 3162 ) 3163 3164 if not self._match(TokenType.GT): 3165 self.raise_error("Expecting >") 3166 3167 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3168 values = self._parse_csv(self._parse_conjunction) 3169 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3170 3171 if type_token in self.TIMESTAMPS: 3172 if self._match_text_seq("WITH", "TIME", "ZONE"): 3173 maybe_func = False 3174 tz_type = ( 3175 exp.DataType.Type.TIMETZ 3176 if type_token in self.TIMES 3177 else exp.DataType.Type.TIMESTAMPTZ 3178 ) 3179 this = exp.DataType(this=tz_type, expressions=expressions) 3180 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3181 maybe_func = False 3182 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3183 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3184 maybe_func = False 3185 elif type_token == TokenType.INTERVAL: 3186 if self._match_text_seq("YEAR", "TO", "MONTH"): 3187 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3188 elif self._match_text_seq("DAY", "TO", "SECOND"): 3189 span = [exp.IntervalDayToSecondSpan()] 3190 else: 3191 span = None 3192 3193 unit = not span and self._parse_var() 3194 if not unit: 3195 this = self.expression( 3196 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3197 ) 3198 else: 3199 this = self.expression(exp.Interval, unit=unit) 3200 3201 if maybe_func and check_func: 3202 index2 = self._index 3203 peek = self._parse_string() 3204 3205 if not peek: 3206 self._retreat(index) 3207 return None 3208 3209 self._retreat(index2) 3210 3211 if not this: 3212 this = exp.DataType( 3213 this=exp.DataType.Type[type_token.value], 3214 expressions=expressions, 3215 nested=nested, 3216 values=values, 3217 prefix=prefix, 3218 ) 3219 3220 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3221 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3222 3223 return this 3224 3225 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3226 this = self._parse_type() or self._parse_id_var() 3227 self._match(TokenType.COLON) 3228 return self._parse_column_def(this) 3229 3230 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3231 if not self._match_text_seq("AT", "TIME", "ZONE"): 3232 return this 3233 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3234 3235 def _parse_column(self) -> t.Optional[exp.Expression]: 3236 this = self._parse_field() 3237 if isinstance(this, exp.Identifier): 3238 this = self.expression(exp.Column, this=this) 3239 elif not this: 3240 return self._parse_bracket(this) 3241 return self._parse_column_ops(this) 3242 3243 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3244 this = self._parse_bracket(this) 3245 3246 while self._match_set(self.COLUMN_OPERATORS): 3247 op_token = self._prev.token_type 3248 op = self.COLUMN_OPERATORS.get(op_token) 3249 3250 if op_token == TokenType.DCOLON: 3251 field = self._parse_types() 3252 if not field: 3253 self.raise_error("Expected type") 3254 elif op and self._curr: 3255 self._advance() 3256 value = self._prev.text 3257 field = ( 3258 exp.Literal.number(value) 3259 if self._prev.token_type == TokenType.NUMBER 3260 else exp.Literal.string(value) 3261 ) 3262 else: 3263 field = self._parse_field(anonymous_func=True, any_token=True) 3264 3265 if isinstance(field, exp.Func): 3266 # bigquery allows function calls like x.y.count(...) 3267 # SAFE.SUBSTR(...) 3268 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3269 this = self._replace_columns_with_dots(this) 3270 3271 if op: 3272 this = op(self, this, field) 3273 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3274 this = self.expression( 3275 exp.Column, 3276 this=field, 3277 table=this.this, 3278 db=this.args.get("table"), 3279 catalog=this.args.get("db"), 3280 ) 3281 else: 3282 this = self.expression(exp.Dot, this=this, expression=field) 3283 this = self._parse_bracket(this) 3284 return this 3285 3286 def _parse_primary(self) -> t.Optional[exp.Expression]: 3287 if self._match_set(self.PRIMARY_PARSERS): 3288 token_type = self._prev.token_type 3289 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3290 3291 if token_type == TokenType.STRING: 3292 expressions = [primary] 3293 while self._match(TokenType.STRING): 3294 expressions.append(exp.Literal.string(self._prev.text)) 3295 3296 if len(expressions) > 1: 3297 return self.expression(exp.Concat, expressions=expressions) 3298 3299 return primary 3300 3301 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3302 return exp.Literal.number(f"0.{self._prev.text}") 3303 3304 if self._match(TokenType.L_PAREN): 3305 comments = self._prev_comments 3306 query = self._parse_select() 3307 3308 if query: 3309 expressions = [query] 3310 else: 3311 expressions = self._parse_expressions() 3312 3313 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3314 3315 if isinstance(this, exp.Subqueryable): 3316 this = self._parse_set_operations( 3317 self._parse_subquery(this=this, parse_alias=False) 3318 ) 3319 elif len(expressions) > 1: 3320 this = self.expression(exp.Tuple, expressions=expressions) 3321 else: 3322 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3323 3324 if this: 3325 this.add_comments(comments) 3326 3327 self._match_r_paren(expression=this) 3328 return this 3329 3330 return None 3331 3332 def _parse_field( 3333 self, 3334 any_token: bool = False, 3335 tokens: t.Optional[t.Collection[TokenType]] = None, 3336 anonymous_func: bool = False, 3337 ) -> t.Optional[exp.Expression]: 3338 return ( 3339 self._parse_primary() 3340 or self._parse_function(anonymous=anonymous_func) 3341 or self._parse_id_var(any_token=any_token, tokens=tokens) 3342 ) 3343 3344 def _parse_function( 3345 self, 3346 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3347 anonymous: bool = False, 3348 optional_parens: bool = True, 3349 ) -> t.Optional[exp.Expression]: 3350 if not self._curr: 3351 return None 3352 3353 token_type = self._curr.token_type 3354 this = self._curr.text 3355 upper = this.upper() 3356 3357 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3358 if optional_parens and parser: 3359 self._advance() 3360 return parser(self) 3361 3362 if not self._next or self._next.token_type != TokenType.L_PAREN: 3363 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3364 self._advance() 3365 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3366 3367 return None 3368 3369 if token_type not in self.FUNC_TOKENS: 3370 return None 3371 3372 self._advance(2) 3373 3374 parser = self.FUNCTION_PARSERS.get(upper) 3375 if parser and not anonymous: 3376 this = parser(self) 3377 else: 3378 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3379 3380 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3381 this = self.expression(subquery_predicate, this=self._parse_select()) 3382 self._match_r_paren() 3383 return this 3384 3385 if functions is None: 3386 functions = self.FUNCTIONS 3387 3388 function = functions.get(upper) 3389 3390 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3391 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3392 3393 if function and not anonymous: 3394 func = self.validate_expression(function(args), args) 3395 if not self.NORMALIZE_FUNCTIONS: 3396 func.meta["name"] = this 3397 this = func 3398 else: 3399 this = self.expression(exp.Anonymous, this=this, expressions=args) 3400 3401 self._match_r_paren(this) 3402 return self._parse_window(this) 3403 3404 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3405 return self._parse_column_def(self._parse_id_var()) 3406 3407 def _parse_user_defined_function( 3408 self, kind: t.Optional[TokenType] = None 3409 ) -> t.Optional[exp.Expression]: 3410 this = self._parse_id_var() 3411 3412 while self._match(TokenType.DOT): 3413 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3414 3415 if not self._match(TokenType.L_PAREN): 3416 return this 3417 3418 expressions = self._parse_csv(self._parse_function_parameter) 3419 self._match_r_paren() 3420 return self.expression( 3421 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3422 ) 3423 3424 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3425 literal = self._parse_primary() 3426 if literal: 3427 return self.expression(exp.Introducer, this=token.text, expression=literal) 3428 3429 return self.expression(exp.Identifier, this=token.text) 3430 3431 def _parse_session_parameter(self) -> exp.SessionParameter: 3432 kind = None 3433 this = self._parse_id_var() or self._parse_primary() 3434 3435 if this and self._match(TokenType.DOT): 3436 kind = this.name 3437 this = self._parse_var() or self._parse_primary() 3438 3439 return self.expression(exp.SessionParameter, this=this, kind=kind) 3440 3441 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3442 index = self._index 3443 3444 if self._match(TokenType.L_PAREN): 3445 expressions = self._parse_csv(self._parse_id_var) 3446 3447 if not self._match(TokenType.R_PAREN): 3448 self._retreat(index) 3449 else: 3450 expressions = [self._parse_id_var()] 3451 3452 if self._match_set(self.LAMBDAS): 3453 return self.LAMBDAS[self._prev.token_type](self, expressions) 3454 3455 self._retreat(index) 3456 3457 this: t.Optional[exp.Expression] 3458 3459 if self._match(TokenType.DISTINCT): 3460 this = self.expression( 3461 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3462 ) 3463 else: 3464 this = self._parse_select_or_expression(alias=alias) 3465 3466 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3467 3468 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3469 index = self._index 3470 3471 if not self.errors: 3472 try: 3473 if self._parse_select(nested=True): 3474 return this 3475 except ParseError: 3476 pass 3477 finally: 3478 self.errors.clear() 3479 self._retreat(index) 3480 3481 if not self._match(TokenType.L_PAREN): 3482 return this 3483 3484 args = self._parse_csv( 3485 lambda: self._parse_constraint() 3486 or self._parse_column_def(self._parse_field(any_token=True)) 3487 ) 3488 3489 self._match_r_paren() 3490 return self.expression(exp.Schema, this=this, expressions=args) 3491 3492 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3493 # column defs are not really columns, they're identifiers 3494 if isinstance(this, exp.Column): 3495 this = this.this 3496 3497 kind = self._parse_types(schema=True) 3498 3499 if self._match_text_seq("FOR", "ORDINALITY"): 3500 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3501 3502 constraints = [] 3503 while True: 3504 constraint = self._parse_column_constraint() 3505 if not constraint: 3506 break 3507 constraints.append(constraint) 3508 3509 if not kind and not constraints: 3510 return this 3511 3512 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3513 3514 def _parse_auto_increment( 3515 self, 3516 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3517 start = None 3518 increment = None 3519 3520 if self._match(TokenType.L_PAREN, advance=False): 3521 args = self._parse_wrapped_csv(self._parse_bitwise) 3522 start = seq_get(args, 0) 3523 increment = seq_get(args, 1) 3524 elif self._match_text_seq("START"): 3525 start = self._parse_bitwise() 3526 self._match_text_seq("INCREMENT") 3527 increment = self._parse_bitwise() 3528 3529 if start and increment: 3530 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3531 3532 return exp.AutoIncrementColumnConstraint() 3533 3534 def _parse_compress(self) -> exp.CompressColumnConstraint: 3535 if self._match(TokenType.L_PAREN, advance=False): 3536 return self.expression( 3537 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3538 ) 3539 3540 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3541 3542 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3543 if self._match_text_seq("BY", "DEFAULT"): 3544 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3545 this = self.expression( 3546 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3547 ) 3548 else: 3549 self._match_text_seq("ALWAYS") 3550 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3551 3552 self._match(TokenType.ALIAS) 3553 identity = self._match_text_seq("IDENTITY") 3554 3555 if self._match(TokenType.L_PAREN): 3556 if self._match_text_seq("START", "WITH"): 3557 this.set("start", self._parse_bitwise()) 3558 if self._match_text_seq("INCREMENT", "BY"): 3559 this.set("increment", self._parse_bitwise()) 3560 if self._match_text_seq("MINVALUE"): 3561 this.set("minvalue", self._parse_bitwise()) 3562 if self._match_text_seq("MAXVALUE"): 3563 this.set("maxvalue", self._parse_bitwise()) 3564 3565 if self._match_text_seq("CYCLE"): 3566 this.set("cycle", True) 3567 elif self._match_text_seq("NO", "CYCLE"): 3568 this.set("cycle", False) 3569 3570 if not identity: 3571 this.set("expression", self._parse_bitwise()) 3572 3573 self._match_r_paren() 3574 3575 return this 3576 3577 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3578 self._match_text_seq("LENGTH") 3579 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3580 3581 def _parse_not_constraint( 3582 self, 3583 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3584 if self._match_text_seq("NULL"): 3585 return self.expression(exp.NotNullColumnConstraint) 3586 if self._match_text_seq("CASESPECIFIC"): 3587 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3588 return None 3589 3590 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3591 if self._match(TokenType.CONSTRAINT): 3592 this = self._parse_id_var() 3593 else: 3594 this = None 3595 3596 if self._match_texts(self.CONSTRAINT_PARSERS): 3597 return self.expression( 3598 exp.ColumnConstraint, 3599 this=this, 3600 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3601 ) 3602 3603 return this 3604 3605 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3606 if not self._match(TokenType.CONSTRAINT): 3607 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3608 3609 this = self._parse_id_var() 3610 expressions = [] 3611 3612 while True: 3613 constraint = self._parse_unnamed_constraint() or self._parse_function() 3614 if not constraint: 3615 break 3616 expressions.append(constraint) 3617 3618 return self.expression(exp.Constraint, this=this, expressions=expressions) 3619 3620 def _parse_unnamed_constraint( 3621 self, constraints: t.Optional[t.Collection[str]] = None 3622 ) -> t.Optional[exp.Expression]: 3623 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3624 return None 3625 3626 constraint = self._prev.text.upper() 3627 if constraint not in self.CONSTRAINT_PARSERS: 3628 self.raise_error(f"No parser found for schema constraint {constraint}.") 3629 3630 return self.CONSTRAINT_PARSERS[constraint](self) 3631 3632 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3633 self._match_text_seq("KEY") 3634 return self.expression( 3635 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3636 ) 3637 3638 def _parse_key_constraint_options(self) -> t.List[str]: 3639 options = [] 3640 while True: 3641 if not self._curr: 3642 break 3643 3644 if self._match(TokenType.ON): 3645 action = None 3646 on = self._advance_any() and self._prev.text 3647 3648 if self._match_text_seq("NO", "ACTION"): 3649 action = "NO ACTION" 3650 elif self._match_text_seq("CASCADE"): 3651 action = "CASCADE" 3652 elif self._match_pair(TokenType.SET, TokenType.NULL): 3653 action = "SET NULL" 3654 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3655 action = "SET DEFAULT" 3656 else: 3657 self.raise_error("Invalid key constraint") 3658 3659 options.append(f"ON {on} {action}") 3660 elif self._match_text_seq("NOT", "ENFORCED"): 3661 options.append("NOT ENFORCED") 3662 elif self._match_text_seq("DEFERRABLE"): 3663 options.append("DEFERRABLE") 3664 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3665 options.append("INITIALLY DEFERRED") 3666 elif self._match_text_seq("NORELY"): 3667 options.append("NORELY") 3668 elif self._match_text_seq("MATCH", "FULL"): 3669 options.append("MATCH FULL") 3670 else: 3671 break 3672 3673 return options 3674 3675 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3676 if match and not self._match(TokenType.REFERENCES): 3677 return None 3678 3679 expressions = None 3680 this = self._parse_table(schema=True) 3681 options = self._parse_key_constraint_options() 3682 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3683 3684 def _parse_foreign_key(self) -> exp.ForeignKey: 3685 expressions = self._parse_wrapped_id_vars() 3686 reference = self._parse_references() 3687 options = {} 3688 3689 while self._match(TokenType.ON): 3690 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3691 self.raise_error("Expected DELETE or UPDATE") 3692 3693 kind = self._prev.text.lower() 3694 3695 if self._match_text_seq("NO", "ACTION"): 3696 action = "NO ACTION" 3697 elif self._match(TokenType.SET): 3698 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3699 action = "SET " + self._prev.text.upper() 3700 else: 3701 self._advance() 3702 action = self._prev.text.upper() 3703 3704 options[kind] = action 3705 3706 return self.expression( 3707 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3708 ) 3709 3710 def _parse_primary_key( 3711 self, wrapped_optional: bool = False, in_props: bool = False 3712 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3713 desc = ( 3714 self._match_set((TokenType.ASC, TokenType.DESC)) 3715 and self._prev.token_type == TokenType.DESC 3716 ) 3717 3718 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3719 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3720 3721 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3722 options = self._parse_key_constraint_options() 3723 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3724 3725 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3726 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3727 return this 3728 3729 bracket_kind = self._prev.token_type 3730 3731 if self._match(TokenType.COLON): 3732 expressions: t.List[t.Optional[exp.Expression]] = [ 3733 self.expression(exp.Slice, expression=self._parse_conjunction()) 3734 ] 3735 else: 3736 expressions = self._parse_csv( 3737 lambda: self._parse_slice( 3738 self._parse_alias(self._parse_conjunction(), explicit=True) 3739 ) 3740 ) 3741 3742 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3743 if bracket_kind == TokenType.L_BRACE: 3744 this = self.expression(exp.Struct, expressions=expressions) 3745 elif not this or this.name.upper() == "ARRAY": 3746 this = self.expression(exp.Array, expressions=expressions) 3747 else: 3748 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3749 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3750 3751 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3752 self.raise_error("Expected ]") 3753 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3754 self.raise_error("Expected }") 3755 3756 self._add_comments(this) 3757 return self._parse_bracket(this) 3758 3759 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3760 if self._match(TokenType.COLON): 3761 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3762 return this 3763 3764 def _parse_case(self) -> t.Optional[exp.Expression]: 3765 ifs = [] 3766 default = None 3767 3768 comments = self._prev_comments 3769 expression = self._parse_conjunction() 3770 3771 while self._match(TokenType.WHEN): 3772 this = self._parse_conjunction() 3773 self._match(TokenType.THEN) 3774 then = self._parse_conjunction() 3775 ifs.append(self.expression(exp.If, this=this, true=then)) 3776 3777 if self._match(TokenType.ELSE): 3778 default = self._parse_conjunction() 3779 3780 if not self._match(TokenType.END): 3781 self.raise_error("Expected END after CASE", self._prev) 3782 3783 return self._parse_window( 3784 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3785 ) 3786 3787 def _parse_if(self) -> t.Optional[exp.Expression]: 3788 if self._match(TokenType.L_PAREN): 3789 args = self._parse_csv(self._parse_conjunction) 3790 this = self.validate_expression(exp.If.from_arg_list(args), args) 3791 self._match_r_paren() 3792 else: 3793 index = self._index - 1 3794 condition = self._parse_conjunction() 3795 3796 if not condition: 3797 self._retreat(index) 3798 return None 3799 3800 self._match(TokenType.THEN) 3801 true = self._parse_conjunction() 3802 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3803 self._match(TokenType.END) 3804 this = self.expression(exp.If, this=condition, true=true, false=false) 3805 3806 return self._parse_window(this) 3807 3808 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3809 if not self._match_text_seq("VALUE", "FOR"): 3810 self._retreat(self._index - 1) 3811 return None 3812 3813 return self.expression( 3814 exp.NextValueFor, 3815 this=self._parse_column(), 3816 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3817 ) 3818 3819 def _parse_extract(self) -> exp.Extract: 3820 this = self._parse_function() or self._parse_var() or self._parse_type() 3821 3822 if self._match(TokenType.FROM): 3823 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3824 3825 if not self._match(TokenType.COMMA): 3826 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3827 3828 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3829 3830 def _parse_any_value(self) -> exp.AnyValue: 3831 this = self._parse_lambda() 3832 is_max = None 3833 having = None 3834 3835 if self._match(TokenType.HAVING): 3836 self._match_texts(("MAX", "MIN")) 3837 is_max = self._prev.text == "MAX" 3838 having = self._parse_column() 3839 3840 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3841 3842 def _parse_cast(self, strict: bool) -> exp.Expression: 3843 this = self._parse_conjunction() 3844 3845 if not self._match(TokenType.ALIAS): 3846 if self._match(TokenType.COMMA): 3847 return self.expression( 3848 exp.CastToStrType, this=this, expression=self._parse_string() 3849 ) 3850 else: 3851 self.raise_error("Expected AS after CAST") 3852 3853 fmt = None 3854 to = self._parse_types() 3855 3856 if not to: 3857 self.raise_error("Expected TYPE after CAST") 3858 elif to.this == exp.DataType.Type.CHAR: 3859 if self._match(TokenType.CHARACTER_SET): 3860 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3861 elif self._match(TokenType.FORMAT): 3862 fmt_string = self._parse_string() 3863 fmt = self._parse_at_time_zone(fmt_string) 3864 3865 if to.this in exp.DataType.TEMPORAL_TYPES: 3866 this = self.expression( 3867 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3868 this=this, 3869 format=exp.Literal.string( 3870 format_time( 3871 fmt_string.this if fmt_string else "", 3872 self.FORMAT_MAPPING or self.TIME_MAPPING, 3873 self.FORMAT_TRIE or self.TIME_TRIE, 3874 ) 3875 ), 3876 ) 3877 3878 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3879 this.set("zone", fmt.args["zone"]) 3880 3881 return this 3882 3883 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3884 3885 def _parse_concat(self) -> t.Optional[exp.Expression]: 3886 args = self._parse_csv(self._parse_conjunction) 3887 if self.CONCAT_NULL_OUTPUTS_STRING: 3888 args = [ 3889 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3890 for arg in args 3891 if arg 3892 ] 3893 3894 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3895 # we find such a call we replace it with its argument. 3896 if len(args) == 1: 3897 return args[0] 3898 3899 return self.expression( 3900 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3901 ) 3902 3903 def _parse_string_agg(self) -> exp.Expression: 3904 if self._match(TokenType.DISTINCT): 3905 args: t.List[t.Optional[exp.Expression]] = [ 3906 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3907 ] 3908 if self._match(TokenType.COMMA): 3909 args.extend(self._parse_csv(self._parse_conjunction)) 3910 else: 3911 args = self._parse_csv(self._parse_conjunction) 3912 3913 index = self._index 3914 if not self._match(TokenType.R_PAREN) and args: 3915 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3916 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3917 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3918 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3919 3920 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3921 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3922 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3923 if not self._match_text_seq("WITHIN", "GROUP"): 3924 self._retreat(index) 3925 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3926 3927 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3928 order = self._parse_order(this=seq_get(args, 0)) 3929 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3930 3931 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3932 this = self._parse_bitwise() 3933 3934 if self._match(TokenType.USING): 3935 to: t.Optional[exp.Expression] = self.expression( 3936 exp.CharacterSet, this=self._parse_var() 3937 ) 3938 elif self._match(TokenType.COMMA): 3939 to = self._parse_types() 3940 else: 3941 to = None 3942 3943 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3944 3945 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3946 """ 3947 There are generally two variants of the DECODE function: 3948 3949 - DECODE(bin, charset) 3950 - DECODE(expression, search, result [, search, result] ... [, default]) 3951 3952 The second variant will always be parsed into a CASE expression. Note that NULL 3953 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3954 instead of relying on pattern matching. 3955 """ 3956 args = self._parse_csv(self._parse_conjunction) 3957 3958 if len(args) < 3: 3959 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3960 3961 expression, *expressions = args 3962 if not expression: 3963 return None 3964 3965 ifs = [] 3966 for search, result in zip(expressions[::2], expressions[1::2]): 3967 if not search or not result: 3968 return None 3969 3970 if isinstance(search, exp.Literal): 3971 ifs.append( 3972 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3973 ) 3974 elif isinstance(search, exp.Null): 3975 ifs.append( 3976 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3977 ) 3978 else: 3979 cond = exp.or_( 3980 exp.EQ(this=expression.copy(), expression=search), 3981 exp.and_( 3982 exp.Is(this=expression.copy(), expression=exp.Null()), 3983 exp.Is(this=search.copy(), expression=exp.Null()), 3984 copy=False, 3985 ), 3986 copy=False, 3987 ) 3988 ifs.append(exp.If(this=cond, true=result)) 3989 3990 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3991 3992 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3993 self._match_text_seq("KEY") 3994 key = self._parse_field() 3995 self._match(TokenType.COLON) 3996 self._match_text_seq("VALUE") 3997 value = self._parse_field() 3998 3999 if not key and not value: 4000 return None 4001 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4002 4003 def _parse_json_object(self) -> exp.JSONObject: 4004 star = self._parse_star() 4005 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4006 4007 null_handling = None 4008 if self._match_text_seq("NULL", "ON", "NULL"): 4009 null_handling = "NULL ON NULL" 4010 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4011 null_handling = "ABSENT ON NULL" 4012 4013 unique_keys = None 4014 if self._match_text_seq("WITH", "UNIQUE"): 4015 unique_keys = True 4016 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4017 unique_keys = False 4018 4019 self._match_text_seq("KEYS") 4020 4021 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4022 format_json = self._match_text_seq("FORMAT", "JSON") 4023 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4024 4025 return self.expression( 4026 exp.JSONObject, 4027 expressions=expressions, 4028 null_handling=null_handling, 4029 unique_keys=unique_keys, 4030 return_type=return_type, 4031 format_json=format_json, 4032 encoding=encoding, 4033 ) 4034 4035 def _parse_logarithm(self) -> exp.Func: 4036 # Default argument order is base, expression 4037 args = self._parse_csv(self._parse_range) 4038 4039 if len(args) > 1: 4040 if not self.LOG_BASE_FIRST: 4041 args.reverse() 4042 return exp.Log.from_arg_list(args) 4043 4044 return self.expression( 4045 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4046 ) 4047 4048 def _parse_match_against(self) -> exp.MatchAgainst: 4049 expressions = self._parse_csv(self._parse_column) 4050 4051 self._match_text_seq(")", "AGAINST", "(") 4052 4053 this = self._parse_string() 4054 4055 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4056 modifier = "IN NATURAL LANGUAGE MODE" 4057 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4058 modifier = f"{modifier} WITH QUERY EXPANSION" 4059 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4060 modifier = "IN BOOLEAN MODE" 4061 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4062 modifier = "WITH QUERY EXPANSION" 4063 else: 4064 modifier = None 4065 4066 return self.expression( 4067 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4068 ) 4069 4070 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4071 def _parse_open_json(self) -> exp.OpenJSON: 4072 this = self._parse_bitwise() 4073 path = self._match(TokenType.COMMA) and self._parse_string() 4074 4075 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4076 this = self._parse_field(any_token=True) 4077 kind = self._parse_types() 4078 path = self._parse_string() 4079 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4080 4081 return self.expression( 4082 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4083 ) 4084 4085 expressions = None 4086 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4087 self._match_l_paren() 4088 expressions = self._parse_csv(_parse_open_json_column_def) 4089 4090 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4091 4092 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4093 args = self._parse_csv(self._parse_bitwise) 4094 4095 if self._match(TokenType.IN): 4096 return self.expression( 4097 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4098 ) 4099 4100 if haystack_first: 4101 haystack = seq_get(args, 0) 4102 needle = seq_get(args, 1) 4103 else: 4104 needle = seq_get(args, 0) 4105 haystack = seq_get(args, 1) 4106 4107 return self.expression( 4108 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4109 ) 4110 4111 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4112 args = self._parse_csv(self._parse_table) 4113 return exp.JoinHint(this=func_name.upper(), expressions=args) 4114 4115 def _parse_substring(self) -> exp.Substring: 4116 # Postgres supports the form: substring(string [from int] [for int]) 4117 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4118 4119 args = self._parse_csv(self._parse_bitwise) 4120 4121 if self._match(TokenType.FROM): 4122 args.append(self._parse_bitwise()) 4123 if self._match(TokenType.FOR): 4124 args.append(self._parse_bitwise()) 4125 4126 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4127 4128 def _parse_trim(self) -> exp.Trim: 4129 # https://www.w3resource.com/sql/character-functions/trim.php 4130 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4131 4132 position = None 4133 collation = None 4134 4135 if self._match_texts(self.TRIM_TYPES): 4136 position = self._prev.text.upper() 4137 4138 expression = self._parse_bitwise() 4139 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4140 this = self._parse_bitwise() 4141 else: 4142 this = expression 4143 expression = None 4144 4145 if self._match(TokenType.COLLATE): 4146 collation = self._parse_bitwise() 4147 4148 return self.expression( 4149 exp.Trim, this=this, position=position, expression=expression, collation=collation 4150 ) 4151 4152 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4153 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4154 4155 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4156 return self._parse_window(self._parse_id_var(), alias=True) 4157 4158 def _parse_respect_or_ignore_nulls( 4159 self, this: t.Optional[exp.Expression] 4160 ) -> t.Optional[exp.Expression]: 4161 if self._match_text_seq("IGNORE", "NULLS"): 4162 return self.expression(exp.IgnoreNulls, this=this) 4163 if self._match_text_seq("RESPECT", "NULLS"): 4164 return self.expression(exp.RespectNulls, this=this) 4165 return this 4166 4167 def _parse_window( 4168 self, this: t.Optional[exp.Expression], alias: bool = False 4169 ) -> t.Optional[exp.Expression]: 4170 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4171 self._match(TokenType.WHERE) 4172 this = self.expression( 4173 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4174 ) 4175 self._match_r_paren() 4176 4177 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4178 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4179 if self._match_text_seq("WITHIN", "GROUP"): 4180 order = self._parse_wrapped(self._parse_order) 4181 this = self.expression(exp.WithinGroup, this=this, expression=order) 4182 4183 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4184 # Some dialects choose to implement and some do not. 4185 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4186 4187 # There is some code above in _parse_lambda that handles 4188 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4189 4190 # The below changes handle 4191 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4192 4193 # Oracle allows both formats 4194 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4195 # and Snowflake chose to do the same for familiarity 4196 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4197 this = self._parse_respect_or_ignore_nulls(this) 4198 4199 # bigquery select from window x AS (partition by ...) 4200 if alias: 4201 over = None 4202 self._match(TokenType.ALIAS) 4203 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4204 return this 4205 else: 4206 over = self._prev.text.upper() 4207 4208 if not self._match(TokenType.L_PAREN): 4209 return self.expression( 4210 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4211 ) 4212 4213 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4214 4215 first = self._match(TokenType.FIRST) 4216 if self._match_text_seq("LAST"): 4217 first = False 4218 4219 partition = self._parse_partition_by() 4220 order = self._parse_order() 4221 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4222 4223 if kind: 4224 self._match(TokenType.BETWEEN) 4225 start = self._parse_window_spec() 4226 self._match(TokenType.AND) 4227 end = self._parse_window_spec() 4228 4229 spec = self.expression( 4230 exp.WindowSpec, 4231 kind=kind, 4232 start=start["value"], 4233 start_side=start["side"], 4234 end=end["value"], 4235 end_side=end["side"], 4236 ) 4237 else: 4238 spec = None 4239 4240 self._match_r_paren() 4241 4242 window = self.expression( 4243 exp.Window, 4244 this=this, 4245 partition_by=partition, 4246 order=order, 4247 spec=spec, 4248 alias=window_alias, 4249 over=over, 4250 first=first, 4251 ) 4252 4253 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4254 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4255 return self._parse_window(window, alias=alias) 4256 4257 return window 4258 4259 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4260 self._match(TokenType.BETWEEN) 4261 4262 return { 4263 "value": ( 4264 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4265 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4266 or self._parse_bitwise() 4267 ), 4268 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4269 } 4270 4271 def _parse_alias( 4272 self, this: t.Optional[exp.Expression], explicit: bool = False 4273 ) -> t.Optional[exp.Expression]: 4274 any_token = self._match(TokenType.ALIAS) 4275 4276 if explicit and not any_token: 4277 return this 4278 4279 if self._match(TokenType.L_PAREN): 4280 aliases = self.expression( 4281 exp.Aliases, 4282 this=this, 4283 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4284 ) 4285 self._match_r_paren(aliases) 4286 return aliases 4287 4288 alias = self._parse_id_var(any_token) 4289 4290 if alias: 4291 return self.expression(exp.Alias, this=this, alias=alias) 4292 4293 return this 4294 4295 def _parse_id_var( 4296 self, 4297 any_token: bool = True, 4298 tokens: t.Optional[t.Collection[TokenType]] = None, 4299 ) -> t.Optional[exp.Expression]: 4300 identifier = self._parse_identifier() 4301 4302 if identifier: 4303 return identifier 4304 4305 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4306 quoted = self._prev.token_type == TokenType.STRING 4307 return exp.Identifier(this=self._prev.text, quoted=quoted) 4308 4309 return None 4310 4311 def _parse_string(self) -> t.Optional[exp.Expression]: 4312 if self._match(TokenType.STRING): 4313 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4314 return self._parse_placeholder() 4315 4316 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4317 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4318 4319 def _parse_number(self) -> t.Optional[exp.Expression]: 4320 if self._match(TokenType.NUMBER): 4321 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4322 return self._parse_placeholder() 4323 4324 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4325 if self._match(TokenType.IDENTIFIER): 4326 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4327 return self._parse_placeholder() 4328 4329 def _parse_var( 4330 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4331 ) -> t.Optional[exp.Expression]: 4332 if ( 4333 (any_token and self._advance_any()) 4334 or self._match(TokenType.VAR) 4335 or (self._match_set(tokens) if tokens else False) 4336 ): 4337 return self.expression(exp.Var, this=self._prev.text) 4338 return self._parse_placeholder() 4339 4340 def _advance_any(self) -> t.Optional[Token]: 4341 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4342 self._advance() 4343 return self._prev 4344 return None 4345 4346 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4347 return self._parse_var() or self._parse_string() 4348 4349 def _parse_null(self) -> t.Optional[exp.Expression]: 4350 if self._match(TokenType.NULL): 4351 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4352 return self._parse_placeholder() 4353 4354 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4355 if self._match(TokenType.TRUE): 4356 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4357 if self._match(TokenType.FALSE): 4358 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4359 return self._parse_placeholder() 4360 4361 def _parse_star(self) -> t.Optional[exp.Expression]: 4362 if self._match(TokenType.STAR): 4363 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4364 return self._parse_placeholder() 4365 4366 def _parse_parameter(self) -> exp.Parameter: 4367 wrapped = self._match(TokenType.L_BRACE) 4368 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4369 self._match(TokenType.R_BRACE) 4370 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4371 4372 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4373 if self._match_set(self.PLACEHOLDER_PARSERS): 4374 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4375 if placeholder: 4376 return placeholder 4377 self._advance(-1) 4378 return None 4379 4380 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4381 if not self._match(TokenType.EXCEPT): 4382 return None 4383 if self._match(TokenType.L_PAREN, advance=False): 4384 return self._parse_wrapped_csv(self._parse_column) 4385 return self._parse_csv(self._parse_column) 4386 4387 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4388 if not self._match(TokenType.REPLACE): 4389 return None 4390 if self._match(TokenType.L_PAREN, advance=False): 4391 return self._parse_wrapped_csv(self._parse_expression) 4392 return self._parse_expressions() 4393 4394 def _parse_csv( 4395 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4396 ) -> t.List[t.Optional[exp.Expression]]: 4397 parse_result = parse_method() 4398 items = [parse_result] if parse_result is not None else [] 4399 4400 while self._match(sep): 4401 self._add_comments(parse_result) 4402 parse_result = parse_method() 4403 if parse_result is not None: 4404 items.append(parse_result) 4405 4406 return items 4407 4408 def _parse_tokens( 4409 self, parse_method: t.Callable, expressions: t.Dict 4410 ) -> t.Optional[exp.Expression]: 4411 this = parse_method() 4412 4413 while self._match_set(expressions): 4414 this = self.expression( 4415 expressions[self._prev.token_type], 4416 this=this, 4417 comments=self._prev_comments, 4418 expression=parse_method(), 4419 ) 4420 4421 return this 4422 4423 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4424 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4425 4426 def _parse_wrapped_csv( 4427 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4428 ) -> t.List[t.Optional[exp.Expression]]: 4429 return self._parse_wrapped( 4430 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4431 ) 4432 4433 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4434 wrapped = self._match(TokenType.L_PAREN) 4435 if not wrapped and not optional: 4436 self.raise_error("Expecting (") 4437 parse_result = parse_method() 4438 if wrapped: 4439 self._match_r_paren() 4440 return parse_result 4441 4442 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4443 return self._parse_csv(self._parse_expression) 4444 4445 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4446 return self._parse_select() or self._parse_set_operations( 4447 self._parse_expression() if alias else self._parse_conjunction() 4448 ) 4449 4450 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4451 return self._parse_query_modifiers( 4452 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4453 ) 4454 4455 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4456 this = None 4457 if self._match_texts(self.TRANSACTION_KIND): 4458 this = self._prev.text 4459 4460 self._match_texts({"TRANSACTION", "WORK"}) 4461 4462 modes = [] 4463 while True: 4464 mode = [] 4465 while self._match(TokenType.VAR): 4466 mode.append(self._prev.text) 4467 4468 if mode: 4469 modes.append(" ".join(mode)) 4470 if not self._match(TokenType.COMMA): 4471 break 4472 4473 return self.expression(exp.Transaction, this=this, modes=modes) 4474 4475 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4476 chain = None 4477 savepoint = None 4478 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4479 4480 self._match_texts({"TRANSACTION", "WORK"}) 4481 4482 if self._match_text_seq("TO"): 4483 self._match_text_seq("SAVEPOINT") 4484 savepoint = self._parse_id_var() 4485 4486 if self._match(TokenType.AND): 4487 chain = not self._match_text_seq("NO") 4488 self._match_text_seq("CHAIN") 4489 4490 if is_rollback: 4491 return self.expression(exp.Rollback, savepoint=savepoint) 4492 4493 return self.expression(exp.Commit, chain=chain) 4494 4495 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4496 if not self._match_text_seq("ADD"): 4497 return None 4498 4499 self._match(TokenType.COLUMN) 4500 exists_column = self._parse_exists(not_=True) 4501 expression = self._parse_column_def(self._parse_field(any_token=True)) 4502 4503 if expression: 4504 expression.set("exists", exists_column) 4505 4506 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4507 if self._match_texts(("FIRST", "AFTER")): 4508 position = self._prev.text 4509 column_position = self.expression( 4510 exp.ColumnPosition, this=self._parse_column(), position=position 4511 ) 4512 expression.set("position", column_position) 4513 4514 return expression 4515 4516 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4517 drop = self._match(TokenType.DROP) and self._parse_drop() 4518 if drop and not isinstance(drop, exp.Command): 4519 drop.set("kind", drop.args.get("kind", "COLUMN")) 4520 return drop 4521 4522 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4523 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4524 return self.expression( 4525 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4526 ) 4527 4528 def _parse_add_constraint(self) -> exp.AddConstraint: 4529 this = None 4530 kind = self._prev.token_type 4531 4532 if kind == TokenType.CONSTRAINT: 4533 this = self._parse_id_var() 4534 4535 if self._match_text_seq("CHECK"): 4536 expression = self._parse_wrapped(self._parse_conjunction) 4537 enforced = self._match_text_seq("ENFORCED") 4538 4539 return self.expression( 4540 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4541 ) 4542 4543 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4544 expression = self._parse_foreign_key() 4545 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4546 expression = self._parse_primary_key() 4547 else: 4548 expression = None 4549 4550 return self.expression(exp.AddConstraint, this=this, expression=expression) 4551 4552 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4553 index = self._index - 1 4554 4555 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4556 return self._parse_csv(self._parse_add_constraint) 4557 4558 self._retreat(index) 4559 return self._parse_csv(self._parse_add_column) 4560 4561 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4562 self._match(TokenType.COLUMN) 4563 column = self._parse_field(any_token=True) 4564 4565 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4566 return self.expression(exp.AlterColumn, this=column, drop=True) 4567 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4568 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4569 4570 self._match_text_seq("SET", "DATA") 4571 return self.expression( 4572 exp.AlterColumn, 4573 this=column, 4574 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4575 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4576 using=self._match(TokenType.USING) and self._parse_conjunction(), 4577 ) 4578 4579 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4580 index = self._index - 1 4581 4582 partition_exists = self._parse_exists() 4583 if self._match(TokenType.PARTITION, advance=False): 4584 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4585 4586 self._retreat(index) 4587 return self._parse_csv(self._parse_drop_column) 4588 4589 def _parse_alter_table_rename(self) -> exp.RenameTable: 4590 self._match_text_seq("TO") 4591 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4592 4593 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4594 start = self._prev 4595 4596 if not self._match(TokenType.TABLE): 4597 return self._parse_as_command(start) 4598 4599 exists = self._parse_exists() 4600 this = self._parse_table(schema=True) 4601 4602 if self._next: 4603 self._advance() 4604 4605 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4606 if parser: 4607 actions = ensure_list(parser(self)) 4608 4609 if not self._curr: 4610 return self.expression( 4611 exp.AlterTable, 4612 this=this, 4613 exists=exists, 4614 actions=actions, 4615 ) 4616 return self._parse_as_command(start) 4617 4618 def _parse_merge(self) -> exp.Merge: 4619 self._match(TokenType.INTO) 4620 target = self._parse_table() 4621 4622 self._match(TokenType.USING) 4623 using = self._parse_table() 4624 4625 self._match(TokenType.ON) 4626 on = self._parse_conjunction() 4627 4628 whens = [] 4629 while self._match(TokenType.WHEN): 4630 matched = not self._match(TokenType.NOT) 4631 self._match_text_seq("MATCHED") 4632 source = ( 4633 False 4634 if self._match_text_seq("BY", "TARGET") 4635 else self._match_text_seq("BY", "SOURCE") 4636 ) 4637 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4638 4639 self._match(TokenType.THEN) 4640 4641 if self._match(TokenType.INSERT): 4642 _this = self._parse_star() 4643 if _this: 4644 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4645 else: 4646 then = self.expression( 4647 exp.Insert, 4648 this=self._parse_value(), 4649 expression=self._match(TokenType.VALUES) and self._parse_value(), 4650 ) 4651 elif self._match(TokenType.UPDATE): 4652 expressions = self._parse_star() 4653 if expressions: 4654 then = self.expression(exp.Update, expressions=expressions) 4655 else: 4656 then = self.expression( 4657 exp.Update, 4658 expressions=self._match(TokenType.SET) 4659 and self._parse_csv(self._parse_equality), 4660 ) 4661 elif self._match(TokenType.DELETE): 4662 then = self.expression(exp.Var, this=self._prev.text) 4663 else: 4664 then = None 4665 4666 whens.append( 4667 self.expression( 4668 exp.When, 4669 matched=matched, 4670 source=source, 4671 condition=condition, 4672 then=then, 4673 ) 4674 ) 4675 4676 return self.expression( 4677 exp.Merge, 4678 this=target, 4679 using=using, 4680 on=on, 4681 expressions=whens, 4682 ) 4683 4684 def _parse_show(self) -> t.Optional[exp.Expression]: 4685 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4686 if parser: 4687 return parser(self) 4688 self._advance() 4689 return self.expression(exp.Show, this=self._prev.text.upper()) 4690 4691 def _parse_set_item_assignment( 4692 self, kind: t.Optional[str] = None 4693 ) -> t.Optional[exp.Expression]: 4694 index = self._index 4695 4696 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4697 return self._parse_set_transaction(global_=kind == "GLOBAL") 4698 4699 left = self._parse_primary() or self._parse_id_var() 4700 4701 if not self._match_texts(("=", "TO")): 4702 self._retreat(index) 4703 return None 4704 4705 right = self._parse_statement() or self._parse_id_var() 4706 this = self.expression(exp.EQ, this=left, expression=right) 4707 4708 return self.expression(exp.SetItem, this=this, kind=kind) 4709 4710 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4711 self._match_text_seq("TRANSACTION") 4712 characteristics = self._parse_csv( 4713 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4714 ) 4715 return self.expression( 4716 exp.SetItem, 4717 expressions=characteristics, 4718 kind="TRANSACTION", 4719 **{"global": global_}, # type: ignore 4720 ) 4721 4722 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4723 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4724 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4725 4726 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4727 index = self._index 4728 set_ = self.expression( 4729 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4730 ) 4731 4732 if self._curr: 4733 self._retreat(index) 4734 return self._parse_as_command(self._prev) 4735 4736 return set_ 4737 4738 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4739 for option in options: 4740 if self._match_text_seq(*option.split(" ")): 4741 return exp.var(option) 4742 return None 4743 4744 def _parse_as_command(self, start: Token) -> exp.Command: 4745 while self._curr: 4746 self._advance() 4747 text = self._find_sql(start, self._prev) 4748 size = len(start.text) 4749 return exp.Command(this=text[:size], expression=text[size:]) 4750 4751 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4752 settings = [] 4753 4754 self._match_l_paren() 4755 kind = self._parse_id_var() 4756 4757 if self._match(TokenType.L_PAREN): 4758 while True: 4759 key = self._parse_id_var() 4760 value = self._parse_primary() 4761 4762 if not key and value is None: 4763 break 4764 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4765 self._match(TokenType.R_PAREN) 4766 4767 self._match_r_paren() 4768 4769 return self.expression( 4770 exp.DictProperty, 4771 this=this, 4772 kind=kind.this if kind else None, 4773 settings=settings, 4774 ) 4775 4776 def _parse_dict_range(self, this: str) -> exp.DictRange: 4777 self._match_l_paren() 4778 has_min = self._match_text_seq("MIN") 4779 if has_min: 4780 min = self._parse_var() or self._parse_primary() 4781 self._match_text_seq("MAX") 4782 max = self._parse_var() or self._parse_primary() 4783 else: 4784 max = self._parse_var() or self._parse_primary() 4785 min = exp.Literal.number(0) 4786 self._match_r_paren() 4787 return self.expression(exp.DictRange, this=this, min=min, max=max) 4788 4789 def _find_parser( 4790 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4791 ) -> t.Optional[t.Callable]: 4792 if not self._curr: 4793 return None 4794 4795 index = self._index 4796 this = [] 4797 while True: 4798 # The current token might be multiple words 4799 curr = self._curr.text.upper() 4800 key = curr.split(" ") 4801 this.append(curr) 4802 4803 self._advance() 4804 result, trie = in_trie(trie, key) 4805 if result == TrieResult.FAILED: 4806 break 4807 4808 if result == TrieResult.EXISTS: 4809 subparser = parsers[" ".join(this)] 4810 return subparser 4811 4812 self._retreat(index) 4813 return None 4814 4815 def _match(self, token_type, advance=True, expression=None): 4816 if not self._curr: 4817 return None 4818 4819 if self._curr.token_type == token_type: 4820 if advance: 4821 self._advance() 4822 self._add_comments(expression) 4823 return True 4824 4825 return None 4826 4827 def _match_set(self, types, advance=True): 4828 if not self._curr: 4829 return None 4830 4831 if self._curr.token_type in types: 4832 if advance: 4833 self._advance() 4834 return True 4835 4836 return None 4837 4838 def _match_pair(self, token_type_a, token_type_b, advance=True): 4839 if not self._curr or not self._next: 4840 return None 4841 4842 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4843 if advance: 4844 self._advance(2) 4845 return True 4846 4847 return None 4848 4849 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4850 if not self._match(TokenType.L_PAREN, expression=expression): 4851 self.raise_error("Expecting (") 4852 4853 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4854 if not self._match(TokenType.R_PAREN, expression=expression): 4855 self.raise_error("Expecting )") 4856 4857 def _match_texts(self, texts, advance=True): 4858 if self._curr and self._curr.text.upper() in texts: 4859 if advance: 4860 self._advance() 4861 return True 4862 return False 4863 4864 def _match_text_seq(self, *texts, advance=True): 4865 index = self._index 4866 for text in texts: 4867 if self._curr and self._curr.text.upper() == text: 4868 self._advance() 4869 else: 4870 self._retreat(index) 4871 return False 4872 4873 if not advance: 4874 self._retreat(index) 4875 4876 return True 4877 4878 @t.overload 4879 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4880 ... 4881 4882 @t.overload 4883 def _replace_columns_with_dots( 4884 self, this: t.Optional[exp.Expression] 4885 ) -> t.Optional[exp.Expression]: 4886 ... 4887 4888 def _replace_columns_with_dots(self, this): 4889 if isinstance(this, exp.Dot): 4890 exp.replace_children(this, self._replace_columns_with_dots) 4891 elif isinstance(this, exp.Column): 4892 exp.replace_children(this, self._replace_columns_with_dots) 4893 table = this.args.get("table") 4894 this = ( 4895 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4896 ) 4897 4898 return this 4899 4900 def _replace_lambda( 4901 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4902 ) -> t.Optional[exp.Expression]: 4903 if not node: 4904 return node 4905 4906 for column in node.find_all(exp.Column): 4907 if column.parts[0].name in lambda_variables: 4908 dot_or_id = column.to_dot() if column.table else column.this 4909 parent = column.parent 4910 4911 while isinstance(parent, exp.Dot): 4912 if not isinstance(parent.parent, exp.Dot): 4913 parent.replace(dot_or_id) 4914 break 4915 parent = parent.parent 4916 else: 4917 if column is node: 4918 node = dot_or_id 4919 else: 4920 column.replace(dot_or_id) 4921 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.UNIQUEIDENTIFIER, 191 TokenType.USERDEFINED, 192 TokenType.MONEY, 193 TokenType.SMALLMONEY, 194 TokenType.ROWVERSION, 195 TokenType.IMAGE, 196 TokenType.VARIANT, 197 TokenType.OBJECT, 198 TokenType.INET, 199 TokenType.IPADDRESS, 200 TokenType.IPPREFIX, 201 *ENUM_TYPE_TOKENS, 202 *NESTED_TYPE_TOKENS, 203 } 204 205 SUBQUERY_PREDICATES = { 206 TokenType.ANY: exp.Any, 207 TokenType.ALL: exp.All, 208 TokenType.EXISTS: exp.Exists, 209 TokenType.SOME: exp.Any, 210 } 211 212 RESERVED_KEYWORDS = { 213 *Tokenizer.SINGLE_TOKENS.values(), 214 TokenType.SELECT, 215 } 216 217 DB_CREATABLES = { 218 TokenType.DATABASE, 219 TokenType.SCHEMA, 220 TokenType.TABLE, 221 TokenType.VIEW, 222 TokenType.DICTIONARY, 223 } 224 225 CREATABLES = { 226 TokenType.COLUMN, 227 TokenType.FUNCTION, 228 TokenType.INDEX, 229 TokenType.PROCEDURE, 230 *DB_CREATABLES, 231 } 232 233 # Tokens that can represent identifiers 234 ID_VAR_TOKENS = { 235 TokenType.VAR, 236 TokenType.ANTI, 237 TokenType.APPLY, 238 TokenType.ASC, 239 TokenType.AUTO_INCREMENT, 240 TokenType.BEGIN, 241 TokenType.CACHE, 242 TokenType.CASE, 243 TokenType.COLLATE, 244 TokenType.COMMAND, 245 TokenType.COMMENT, 246 TokenType.COMMIT, 247 TokenType.CONSTRAINT, 248 TokenType.DEFAULT, 249 TokenType.DELETE, 250 TokenType.DESC, 251 TokenType.DESCRIBE, 252 TokenType.DICTIONARY, 253 TokenType.DIV, 254 TokenType.END, 255 TokenType.EXECUTE, 256 TokenType.ESCAPE, 257 TokenType.FALSE, 258 TokenType.FIRST, 259 TokenType.FILTER, 260 TokenType.FORMAT, 261 TokenType.FULL, 262 TokenType.IS, 263 TokenType.ISNULL, 264 TokenType.INTERVAL, 265 TokenType.KEEP, 266 TokenType.LEFT, 267 TokenType.LOAD, 268 TokenType.MERGE, 269 TokenType.NATURAL, 270 TokenType.NEXT, 271 TokenType.OFFSET, 272 TokenType.ORDINALITY, 273 TokenType.OVERWRITE, 274 TokenType.PARTITION, 275 TokenType.PERCENT, 276 TokenType.PIVOT, 277 TokenType.PRAGMA, 278 TokenType.RANGE, 279 TokenType.REFERENCES, 280 TokenType.RIGHT, 281 TokenType.ROW, 282 TokenType.ROWS, 283 TokenType.SEMI, 284 TokenType.SET, 285 TokenType.SETTINGS, 286 TokenType.SHOW, 287 TokenType.TEMPORARY, 288 TokenType.TOP, 289 TokenType.TRUE, 290 TokenType.UNIQUE, 291 TokenType.UNPIVOT, 292 TokenType.UPDATE, 293 TokenType.VOLATILE, 294 TokenType.WINDOW, 295 *CREATABLES, 296 *SUBQUERY_PREDICATES, 297 *TYPE_TOKENS, 298 *NO_PAREN_FUNCTIONS, 299 } 300 301 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 302 303 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 304 TokenType.APPLY, 305 TokenType.ASOF, 306 TokenType.FULL, 307 TokenType.LEFT, 308 TokenType.LOCK, 309 TokenType.NATURAL, 310 TokenType.OFFSET, 311 TokenType.RIGHT, 312 TokenType.WINDOW, 313 } 314 315 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 316 317 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 318 319 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 320 321 FUNC_TOKENS = { 322 TokenType.COMMAND, 323 TokenType.CURRENT_DATE, 324 TokenType.CURRENT_DATETIME, 325 TokenType.CURRENT_TIMESTAMP, 326 TokenType.CURRENT_TIME, 327 TokenType.CURRENT_USER, 328 TokenType.FILTER, 329 TokenType.FIRST, 330 TokenType.FORMAT, 331 TokenType.GLOB, 332 TokenType.IDENTIFIER, 333 TokenType.INDEX, 334 TokenType.ISNULL, 335 TokenType.ILIKE, 336 TokenType.LIKE, 337 TokenType.MERGE, 338 TokenType.OFFSET, 339 TokenType.PRIMARY_KEY, 340 TokenType.RANGE, 341 TokenType.REPLACE, 342 TokenType.RLIKE, 343 TokenType.ROW, 344 TokenType.UNNEST, 345 TokenType.VAR, 346 TokenType.LEFT, 347 TokenType.RIGHT, 348 TokenType.DATE, 349 TokenType.DATETIME, 350 TokenType.TABLE, 351 TokenType.TIMESTAMP, 352 TokenType.TIMESTAMPTZ, 353 TokenType.WINDOW, 354 TokenType.XOR, 355 *TYPE_TOKENS, 356 *SUBQUERY_PREDICATES, 357 } 358 359 CONJUNCTION = { 360 TokenType.AND: exp.And, 361 TokenType.OR: exp.Or, 362 } 363 364 EQUALITY = { 365 TokenType.EQ: exp.EQ, 366 TokenType.NEQ: exp.NEQ, 367 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 368 } 369 370 COMPARISON = { 371 TokenType.GT: exp.GT, 372 TokenType.GTE: exp.GTE, 373 TokenType.LT: exp.LT, 374 TokenType.LTE: exp.LTE, 375 } 376 377 BITWISE = { 378 TokenType.AMP: exp.BitwiseAnd, 379 TokenType.CARET: exp.BitwiseXor, 380 TokenType.PIPE: exp.BitwiseOr, 381 TokenType.DPIPE: exp.DPipe, 382 } 383 384 TERM = { 385 TokenType.DASH: exp.Sub, 386 TokenType.PLUS: exp.Add, 387 TokenType.MOD: exp.Mod, 388 TokenType.COLLATE: exp.Collate, 389 } 390 391 FACTOR = { 392 TokenType.DIV: exp.IntDiv, 393 TokenType.LR_ARROW: exp.Distance, 394 TokenType.SLASH: exp.Div, 395 TokenType.STAR: exp.Mul, 396 } 397 398 TIMES = { 399 TokenType.TIME, 400 TokenType.TIMETZ, 401 } 402 403 TIMESTAMPS = { 404 TokenType.TIMESTAMP, 405 TokenType.TIMESTAMPTZ, 406 TokenType.TIMESTAMPLTZ, 407 *TIMES, 408 } 409 410 SET_OPERATIONS = { 411 TokenType.UNION, 412 TokenType.INTERSECT, 413 TokenType.EXCEPT, 414 } 415 416 JOIN_METHODS = { 417 TokenType.NATURAL, 418 TokenType.ASOF, 419 } 420 421 JOIN_SIDES = { 422 TokenType.LEFT, 423 TokenType.RIGHT, 424 TokenType.FULL, 425 } 426 427 JOIN_KINDS = { 428 TokenType.INNER, 429 TokenType.OUTER, 430 TokenType.CROSS, 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 JOIN_HINTS: t.Set[str] = set() 436 437 LAMBDAS = { 438 TokenType.ARROW: lambda self, expressions: self.expression( 439 exp.Lambda, 440 this=self._replace_lambda( 441 self._parse_conjunction(), 442 {node.name for node in expressions}, 443 ), 444 expressions=expressions, 445 ), 446 TokenType.FARROW: lambda self, expressions: self.expression( 447 exp.Kwarg, 448 this=exp.var(expressions[0].name), 449 expression=self._parse_conjunction(), 450 ), 451 } 452 453 COLUMN_OPERATORS = { 454 TokenType.DOT: None, 455 TokenType.DCOLON: lambda self, this, to: self.expression( 456 exp.Cast if self.STRICT_CAST else exp.TryCast, 457 this=this, 458 to=to, 459 ), 460 TokenType.ARROW: lambda self, this, path: self.expression( 461 exp.JSONExtract, 462 this=this, 463 expression=path, 464 ), 465 TokenType.DARROW: lambda self, this, path: self.expression( 466 exp.JSONExtractScalar, 467 this=this, 468 expression=path, 469 ), 470 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 471 exp.JSONBExtract, 472 this=this, 473 expression=path, 474 ), 475 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtractScalar, 477 this=this, 478 expression=path, 479 ), 480 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 481 exp.JSONBContains, 482 this=this, 483 expression=key, 484 ), 485 } 486 487 EXPRESSION_PARSERS = { 488 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 489 exp.Column: lambda self: self._parse_column(), 490 exp.Condition: lambda self: self._parse_conjunction(), 491 exp.DataType: lambda self: self._parse_types(), 492 exp.Expression: lambda self: self._parse_statement(), 493 exp.From: lambda self: self._parse_from(), 494 exp.Group: lambda self: self._parse_group(), 495 exp.Having: lambda self: self._parse_having(), 496 exp.Identifier: lambda self: self._parse_id_var(), 497 exp.Join: lambda self: self._parse_join(), 498 exp.Lambda: lambda self: self._parse_lambda(), 499 exp.Lateral: lambda self: self._parse_lateral(), 500 exp.Limit: lambda self: self._parse_limit(), 501 exp.Offset: lambda self: self._parse_offset(), 502 exp.Order: lambda self: self._parse_order(), 503 exp.Ordered: lambda self: self._parse_ordered(), 504 exp.Properties: lambda self: self._parse_properties(), 505 exp.Qualify: lambda self: self._parse_qualify(), 506 exp.Returning: lambda self: self._parse_returning(), 507 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 508 exp.Table: lambda self: self._parse_table_parts(), 509 exp.TableAlias: lambda self: self._parse_table_alias(), 510 exp.Where: lambda self: self._parse_where(), 511 exp.Window: lambda self: self._parse_named_window(), 512 exp.With: lambda self: self._parse_with(), 513 "JOIN_TYPE": lambda self: self._parse_join_parts(), 514 } 515 516 STATEMENT_PARSERS = { 517 TokenType.ALTER: lambda self: self._parse_alter(), 518 TokenType.BEGIN: lambda self: self._parse_transaction(), 519 TokenType.CACHE: lambda self: self._parse_cache(), 520 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 521 TokenType.COMMENT: lambda self: self._parse_comment(), 522 TokenType.CREATE: lambda self: self._parse_create(), 523 TokenType.DELETE: lambda self: self._parse_delete(), 524 TokenType.DESC: lambda self: self._parse_describe(), 525 TokenType.DESCRIBE: lambda self: self._parse_describe(), 526 TokenType.DROP: lambda self: self._parse_drop(), 527 TokenType.FROM: lambda self: exp.select("*").from_( 528 t.cast(exp.From, self._parse_from(skip_from_token=True)) 529 ), 530 TokenType.INSERT: lambda self: self._parse_insert(), 531 TokenType.LOAD: lambda self: self._parse_load(), 532 TokenType.MERGE: lambda self: self._parse_merge(), 533 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 534 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 535 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 536 TokenType.SET: lambda self: self._parse_set(), 537 TokenType.UNCACHE: lambda self: self._parse_uncache(), 538 TokenType.UPDATE: lambda self: self._parse_update(), 539 TokenType.USE: lambda self: self.expression( 540 exp.Use, 541 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 542 and exp.var(self._prev.text), 543 this=self._parse_table(schema=False), 544 ), 545 } 546 547 UNARY_PARSERS = { 548 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 549 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 550 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 551 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 552 } 553 554 PRIMARY_PARSERS = { 555 TokenType.STRING: lambda self, token: self.expression( 556 exp.Literal, this=token.text, is_string=True 557 ), 558 TokenType.NUMBER: lambda self, token: self.expression( 559 exp.Literal, this=token.text, is_string=False 560 ), 561 TokenType.STAR: lambda self, _: self.expression( 562 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 563 ), 564 TokenType.NULL: lambda self, _: self.expression(exp.Null), 565 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 566 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 567 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 568 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 569 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 570 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 571 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 572 exp.National, this=token.text 573 ), 574 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 575 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 576 } 577 578 PLACEHOLDER_PARSERS = { 579 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 580 TokenType.PARAMETER: lambda self: self._parse_parameter(), 581 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 582 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 583 else None, 584 } 585 586 RANGE_PARSERS = { 587 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 588 TokenType.GLOB: binary_range_parser(exp.Glob), 589 TokenType.ILIKE: binary_range_parser(exp.ILike), 590 TokenType.IN: lambda self, this: self._parse_in(this), 591 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 592 TokenType.IS: lambda self, this: self._parse_is(this), 593 TokenType.LIKE: binary_range_parser(exp.Like), 594 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 595 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 596 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 597 } 598 599 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 600 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 601 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 602 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 603 "CHARACTER SET": lambda self: self._parse_character_set(), 604 "CHECKSUM": lambda self: self._parse_checksum(), 605 "CLUSTER BY": lambda self: self._parse_cluster(), 606 "CLUSTERED": lambda self: self._parse_clustered_by(), 607 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 608 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 609 "COPY": lambda self: self._parse_copy_property(), 610 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 611 "DEFINER": lambda self: self._parse_definer(), 612 "DETERMINISTIC": lambda self: self.expression( 613 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 614 ), 615 "DISTKEY": lambda self: self._parse_distkey(), 616 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 617 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 618 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 619 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 620 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 621 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 622 "FREESPACE": lambda self: self._parse_freespace(), 623 "HEAP": lambda self: self.expression(exp.HeapProperty), 624 "IMMUTABLE": lambda self: self.expression( 625 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 626 ), 627 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 628 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 629 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 630 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 631 "LIKE": lambda self: self._parse_create_like(), 632 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 633 "LOCK": lambda self: self._parse_locking(), 634 "LOCKING": lambda self: self._parse_locking(), 635 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 636 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 637 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 638 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 639 "NO": lambda self: self._parse_no_property(), 640 "ON": lambda self: self._parse_on_property(), 641 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 642 "PARTITION BY": lambda self: self._parse_partitioned_by(), 643 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 644 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 645 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 646 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 647 "RETURNS": lambda self: self._parse_returns(), 648 "ROW": lambda self: self._parse_row(), 649 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 650 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 651 "SETTINGS": lambda self: self.expression( 652 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 653 ), 654 "SORTKEY": lambda self: self._parse_sortkey(), 655 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 656 "STABLE": lambda self: self.expression( 657 exp.StabilityProperty, this=exp.Literal.string("STABLE") 658 ), 659 "STORED": lambda self: self._parse_stored(), 660 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 661 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 662 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 663 "TO": lambda self: self._parse_to_table(), 664 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 665 "TTL": lambda self: self._parse_ttl(), 666 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 667 "VOLATILE": lambda self: self._parse_volatile_property(), 668 "WITH": lambda self: self._parse_with_property(), 669 } 670 671 CONSTRAINT_PARSERS = { 672 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 673 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 674 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 675 "CHARACTER SET": lambda self: self.expression( 676 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "CHECK": lambda self: self.expression( 679 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 680 ), 681 "COLLATE": lambda self: self.expression( 682 exp.CollateColumnConstraint, this=self._parse_var() 683 ), 684 "COMMENT": lambda self: self.expression( 685 exp.CommentColumnConstraint, this=self._parse_string() 686 ), 687 "COMPRESS": lambda self: self._parse_compress(), 688 "DEFAULT": lambda self: self.expression( 689 exp.DefaultColumnConstraint, this=self._parse_bitwise() 690 ), 691 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 692 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 693 "FORMAT": lambda self: self.expression( 694 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 695 ), 696 "GENERATED": lambda self: self._parse_generated_as_identity(), 697 "IDENTITY": lambda self: self._parse_auto_increment(), 698 "INLINE": lambda self: self._parse_inline(), 699 "LIKE": lambda self: self._parse_create_like(), 700 "NOT": lambda self: self._parse_not_constraint(), 701 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 702 "ON": lambda self: self._match(TokenType.UPDATE) 703 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 704 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 705 "PRIMARY KEY": lambda self: self._parse_primary_key(), 706 "REFERENCES": lambda self: self._parse_references(match=False), 707 "TITLE": lambda self: self.expression( 708 exp.TitleColumnConstraint, this=self._parse_var_or_string() 709 ), 710 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 711 "UNIQUE": lambda self: self._parse_unique(), 712 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 713 } 714 715 ALTER_PARSERS = { 716 "ADD": lambda self: self._parse_alter_table_add(), 717 "ALTER": lambda self: self._parse_alter_table_alter(), 718 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 719 "DROP": lambda self: self._parse_alter_table_drop(), 720 "RENAME": lambda self: self._parse_alter_table_rename(), 721 } 722 723 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 724 725 NO_PAREN_FUNCTION_PARSERS = { 726 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 727 "CASE": lambda self: self._parse_case(), 728 "IF": lambda self: self._parse_if(), 729 "NEXT": lambda self: self._parse_next_value_for(), 730 } 731 732 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 733 734 FUNCTION_PARSERS = { 735 "ANY_VALUE": lambda self: self._parse_any_value(), 736 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 737 "CONCAT": lambda self: self._parse_concat(), 738 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 739 "DECODE": lambda self: self._parse_decode(), 740 "EXTRACT": lambda self: self._parse_extract(), 741 "JSON_OBJECT": lambda self: self._parse_json_object(), 742 "LOG": lambda self: self._parse_logarithm(), 743 "MATCH": lambda self: self._parse_match_against(), 744 "OPENJSON": lambda self: self._parse_open_json(), 745 "POSITION": lambda self: self._parse_position(), 746 "SAFE_CAST": lambda self: self._parse_cast(False), 747 "STRING_AGG": lambda self: self._parse_string_agg(), 748 "SUBSTRING": lambda self: self._parse_substring(), 749 "TRIM": lambda self: self._parse_trim(), 750 "TRY_CAST": lambda self: self._parse_cast(False), 751 "TRY_CONVERT": lambda self: self._parse_convert(False), 752 } 753 754 QUERY_MODIFIER_PARSERS = { 755 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 756 TokenType.WHERE: lambda self: ("where", self._parse_where()), 757 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 758 TokenType.HAVING: lambda self: ("having", self._parse_having()), 759 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 760 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 761 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 762 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 763 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 764 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 765 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 766 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 767 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 768 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 769 TokenType.CLUSTER_BY: lambda self: ( 770 "cluster", 771 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 772 ), 773 TokenType.DISTRIBUTE_BY: lambda self: ( 774 "distribute", 775 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 776 ), 777 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 778 } 779 780 SET_PARSERS = { 781 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 782 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 783 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 784 "TRANSACTION": lambda self: self._parse_set_transaction(), 785 } 786 787 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 788 789 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 790 791 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 792 793 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 794 795 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 796 797 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 798 TRANSACTION_CHARACTERISTICS = { 799 "ISOLATION LEVEL REPEATABLE READ", 800 "ISOLATION LEVEL READ COMMITTED", 801 "ISOLATION LEVEL READ UNCOMMITTED", 802 "ISOLATION LEVEL SERIALIZABLE", 803 "READ WRITE", 804 "READ ONLY", 805 } 806 807 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 808 809 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 810 811 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 812 813 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 814 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 815 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 816 817 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 818 819 STRICT_CAST = True 820 821 # A NULL arg in CONCAT yields NULL by default 822 CONCAT_NULL_OUTPUTS_STRING = False 823 824 PREFIXED_PIVOT_COLUMNS = False 825 IDENTIFY_PIVOT_STRINGS = False 826 827 LOG_BASE_FIRST = True 828 LOG_DEFAULTS_TO_LN = False 829 830 __slots__ = ( 831 "error_level", 832 "error_message_context", 833 "max_errors", 834 "sql", 835 "errors", 836 "_tokens", 837 "_index", 838 "_curr", 839 "_next", 840 "_prev", 841 "_prev_comments", 842 ) 843 844 # Autofilled 845 INDEX_OFFSET: int = 0 846 UNNEST_COLUMN_ONLY: bool = False 847 ALIAS_POST_TABLESAMPLE: bool = False 848 STRICT_STRING_CONCAT = False 849 NORMALIZE_FUNCTIONS = "upper" 850 NULL_ORDERING: str = "nulls_are_small" 851 SHOW_TRIE: t.Dict = {} 852 SET_TRIE: t.Dict = {} 853 FORMAT_MAPPING: t.Dict[str, str] = {} 854 FORMAT_TRIE: t.Dict = {} 855 TIME_MAPPING: t.Dict[str, str] = {} 856 TIME_TRIE: t.Dict = {} 857 858 def __init__( 859 self, 860 error_level: t.Optional[ErrorLevel] = None, 861 error_message_context: int = 100, 862 max_errors: int = 3, 863 ): 864 self.error_level = error_level or ErrorLevel.IMMEDIATE 865 self.error_message_context = error_message_context 866 self.max_errors = max_errors 867 self.reset() 868 869 def reset(self): 870 self.sql = "" 871 self.errors = [] 872 self._tokens = [] 873 self._index = 0 874 self._curr = None 875 self._next = None 876 self._prev = None 877 self._prev_comments = None 878 879 def parse( 880 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 881 ) -> t.List[t.Optional[exp.Expression]]: 882 """ 883 Parses a list of tokens and returns a list of syntax trees, one tree 884 per parsed SQL statement. 885 886 Args: 887 raw_tokens: The list of tokens. 888 sql: The original SQL string, used to produce helpful debug messages. 889 890 Returns: 891 The list of the produced syntax trees. 892 """ 893 return self._parse( 894 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 895 ) 896 897 def parse_into( 898 self, 899 expression_types: exp.IntoType, 900 raw_tokens: t.List[Token], 901 sql: t.Optional[str] = None, 902 ) -> t.List[t.Optional[exp.Expression]]: 903 """ 904 Parses a list of tokens into a given Expression type. If a collection of Expression 905 types is given instead, this method will try to parse the token list into each one 906 of them, stopping at the first for which the parsing succeeds. 907 908 Args: 909 expression_types: The expression type(s) to try and parse the token list into. 910 raw_tokens: The list of tokens. 911 sql: The original SQL string, used to produce helpful debug messages. 912 913 Returns: 914 The target Expression. 915 """ 916 errors = [] 917 for expression_type in ensure_list(expression_types): 918 parser = self.EXPRESSION_PARSERS.get(expression_type) 919 if not parser: 920 raise TypeError(f"No parser registered for {expression_type}") 921 922 try: 923 return self._parse(parser, raw_tokens, sql) 924 except ParseError as e: 925 e.errors[0]["into_expression"] = expression_type 926 errors.append(e) 927 928 raise ParseError( 929 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 930 errors=merge_errors(errors), 931 ) from errors[-1] 932 933 def _parse( 934 self, 935 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 936 raw_tokens: t.List[Token], 937 sql: t.Optional[str] = None, 938 ) -> t.List[t.Optional[exp.Expression]]: 939 self.reset() 940 self.sql = sql or "" 941 942 total = len(raw_tokens) 943 chunks: t.List[t.List[Token]] = [[]] 944 945 for i, token in enumerate(raw_tokens): 946 if token.token_type == TokenType.SEMICOLON: 947 if i < total - 1: 948 chunks.append([]) 949 else: 950 chunks[-1].append(token) 951 952 expressions = [] 953 954 for tokens in chunks: 955 self._index = -1 956 self._tokens = tokens 957 self._advance() 958 959 expressions.append(parse_method(self)) 960 961 if self._index < len(self._tokens): 962 self.raise_error("Invalid expression / Unexpected token") 963 964 self.check_errors() 965 966 return expressions 967 968 def check_errors(self) -> None: 969 """Logs or raises any found errors, depending on the chosen error level setting.""" 970 if self.error_level == ErrorLevel.WARN: 971 for error in self.errors: 972 logger.error(str(error)) 973 elif self.error_level == ErrorLevel.RAISE and self.errors: 974 raise ParseError( 975 concat_messages(self.errors, self.max_errors), 976 errors=merge_errors(self.errors), 977 ) 978 979 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 980 """ 981 Appends an error in the list of recorded errors or raises it, depending on the chosen 982 error level setting. 983 """ 984 token = token or self._curr or self._prev or Token.string("") 985 start = token.start 986 end = token.end + 1 987 start_context = self.sql[max(start - self.error_message_context, 0) : start] 988 highlight = self.sql[start:end] 989 end_context = self.sql[end : end + self.error_message_context] 990 991 error = ParseError.new( 992 f"{message}. Line {token.line}, Col: {token.col}.\n" 993 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 994 description=message, 995 line=token.line, 996 col=token.col, 997 start_context=start_context, 998 highlight=highlight, 999 end_context=end_context, 1000 ) 1001 1002 if self.error_level == ErrorLevel.IMMEDIATE: 1003 raise error 1004 1005 self.errors.append(error) 1006 1007 def expression( 1008 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1009 ) -> E: 1010 """ 1011 Creates a new, validated Expression. 1012 1013 Args: 1014 exp_class: The expression class to instantiate. 1015 comments: An optional list of comments to attach to the expression. 1016 kwargs: The arguments to set for the expression along with their respective values. 1017 1018 Returns: 1019 The target expression. 1020 """ 1021 instance = exp_class(**kwargs) 1022 instance.add_comments(comments) if comments else self._add_comments(instance) 1023 return self.validate_expression(instance) 1024 1025 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1026 if expression and self._prev_comments: 1027 expression.add_comments(self._prev_comments) 1028 self._prev_comments = None 1029 1030 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1031 """ 1032 Validates an Expression, making sure that all its mandatory arguments are set. 1033 1034 Args: 1035 expression: The expression to validate. 1036 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1037 1038 Returns: 1039 The validated expression. 1040 """ 1041 if self.error_level != ErrorLevel.IGNORE: 1042 for error_message in expression.error_messages(args): 1043 self.raise_error(error_message) 1044 1045 return expression 1046 1047 def _find_sql(self, start: Token, end: Token) -> str: 1048 return self.sql[start.start : end.end + 1] 1049 1050 def _advance(self, times: int = 1) -> None: 1051 self._index += times 1052 self._curr = seq_get(self._tokens, self._index) 1053 self._next = seq_get(self._tokens, self._index + 1) 1054 1055 if self._index > 0: 1056 self._prev = self._tokens[self._index - 1] 1057 self._prev_comments = self._prev.comments 1058 else: 1059 self._prev = None 1060 self._prev_comments = None 1061 1062 def _retreat(self, index: int) -> None: 1063 if index != self._index: 1064 self._advance(index - self._index) 1065 1066 def _parse_command(self) -> exp.Command: 1067 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1068 1069 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1070 start = self._prev 1071 exists = self._parse_exists() if allow_exists else None 1072 1073 self._match(TokenType.ON) 1074 1075 kind = self._match_set(self.CREATABLES) and self._prev 1076 if not kind: 1077 return self._parse_as_command(start) 1078 1079 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1080 this = self._parse_user_defined_function(kind=kind.token_type) 1081 elif kind.token_type == TokenType.TABLE: 1082 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1083 elif kind.token_type == TokenType.COLUMN: 1084 this = self._parse_column() 1085 else: 1086 this = self._parse_id_var() 1087 1088 self._match(TokenType.IS) 1089 1090 return self.expression( 1091 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1092 ) 1093 1094 def _parse_to_table( 1095 self, 1096 ) -> exp.ToTableProperty: 1097 table = self._parse_table_parts(schema=True) 1098 return self.expression(exp.ToTableProperty, this=table) 1099 1100 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1101 def _parse_ttl(self) -> exp.Expression: 1102 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1103 this = self._parse_bitwise() 1104 1105 if self._match_text_seq("DELETE"): 1106 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1107 if self._match_text_seq("RECOMPRESS"): 1108 return self.expression( 1109 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1110 ) 1111 if self._match_text_seq("TO", "DISK"): 1112 return self.expression( 1113 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1114 ) 1115 if self._match_text_seq("TO", "VOLUME"): 1116 return self.expression( 1117 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1118 ) 1119 1120 return this 1121 1122 expressions = self._parse_csv(_parse_ttl_action) 1123 where = self._parse_where() 1124 group = self._parse_group() 1125 1126 aggregates = None 1127 if group and self._match(TokenType.SET): 1128 aggregates = self._parse_csv(self._parse_set_item) 1129 1130 return self.expression( 1131 exp.MergeTreeTTL, 1132 expressions=expressions, 1133 where=where, 1134 group=group, 1135 aggregates=aggregates, 1136 ) 1137 1138 def _parse_statement(self) -> t.Optional[exp.Expression]: 1139 if self._curr is None: 1140 return None 1141 1142 if self._match_set(self.STATEMENT_PARSERS): 1143 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1144 1145 if self._match_set(Tokenizer.COMMANDS): 1146 return self._parse_command() 1147 1148 expression = self._parse_expression() 1149 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1150 return self._parse_query_modifiers(expression) 1151 1152 def _parse_drop(self) -> exp.Drop | exp.Command: 1153 start = self._prev 1154 temporary = self._match(TokenType.TEMPORARY) 1155 materialized = self._match_text_seq("MATERIALIZED") 1156 1157 kind = self._match_set(self.CREATABLES) and self._prev.text 1158 if not kind: 1159 return self._parse_as_command(start) 1160 1161 return self.expression( 1162 exp.Drop, 1163 comments=start.comments, 1164 exists=self._parse_exists(), 1165 this=self._parse_table(schema=True), 1166 kind=kind, 1167 temporary=temporary, 1168 materialized=materialized, 1169 cascade=self._match_text_seq("CASCADE"), 1170 constraints=self._match_text_seq("CONSTRAINTS"), 1171 purge=self._match_text_seq("PURGE"), 1172 ) 1173 1174 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1175 return ( 1176 self._match_text_seq("IF") 1177 and (not not_ or self._match(TokenType.NOT)) 1178 and self._match(TokenType.EXISTS) 1179 ) 1180 1181 def _parse_create(self) -> exp.Create | exp.Command: 1182 # Note: this can't be None because we've matched a statement parser 1183 start = self._prev 1184 comments = self._prev_comments 1185 1186 replace = start.text.upper() == "REPLACE" or self._match_pair( 1187 TokenType.OR, TokenType.REPLACE 1188 ) 1189 unique = self._match(TokenType.UNIQUE) 1190 1191 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1192 self._advance() 1193 1194 properties = None 1195 create_token = self._match_set(self.CREATABLES) and self._prev 1196 1197 if not create_token: 1198 # exp.Properties.Location.POST_CREATE 1199 properties = self._parse_properties() 1200 create_token = self._match_set(self.CREATABLES) and self._prev 1201 1202 if not properties or not create_token: 1203 return self._parse_as_command(start) 1204 1205 exists = self._parse_exists(not_=True) 1206 this = None 1207 expression: t.Optional[exp.Expression] = None 1208 indexes = None 1209 no_schema_binding = None 1210 begin = None 1211 clone = None 1212 1213 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1214 nonlocal properties 1215 if properties and temp_props: 1216 properties.expressions.extend(temp_props.expressions) 1217 elif temp_props: 1218 properties = temp_props 1219 1220 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1221 this = self._parse_user_defined_function(kind=create_token.token_type) 1222 1223 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1224 extend_props(self._parse_properties()) 1225 1226 self._match(TokenType.ALIAS) 1227 1228 if self._match(TokenType.COMMAND): 1229 expression = self._parse_as_command(self._prev) 1230 else: 1231 begin = self._match(TokenType.BEGIN) 1232 return_ = self._match_text_seq("RETURN") 1233 expression = self._parse_statement() 1234 1235 if return_: 1236 expression = self.expression(exp.Return, this=expression) 1237 elif create_token.token_type == TokenType.INDEX: 1238 this = self._parse_index(index=self._parse_id_var()) 1239 elif create_token.token_type in self.DB_CREATABLES: 1240 table_parts = self._parse_table_parts(schema=True) 1241 1242 # exp.Properties.Location.POST_NAME 1243 self._match(TokenType.COMMA) 1244 extend_props(self._parse_properties(before=True)) 1245 1246 this = self._parse_schema(this=table_parts) 1247 1248 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1249 extend_props(self._parse_properties()) 1250 1251 self._match(TokenType.ALIAS) 1252 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1253 # exp.Properties.Location.POST_ALIAS 1254 extend_props(self._parse_properties()) 1255 1256 expression = self._parse_ddl_select() 1257 1258 if create_token.token_type == TokenType.TABLE: 1259 # exp.Properties.Location.POST_EXPRESSION 1260 extend_props(self._parse_properties()) 1261 1262 indexes = [] 1263 while True: 1264 index = self._parse_index() 1265 1266 # exp.Properties.Location.POST_INDEX 1267 extend_props(self._parse_properties()) 1268 1269 if not index: 1270 break 1271 else: 1272 self._match(TokenType.COMMA) 1273 indexes.append(index) 1274 elif create_token.token_type == TokenType.VIEW: 1275 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1276 no_schema_binding = True 1277 1278 if self._match_text_seq("CLONE"): 1279 clone = self._parse_table(schema=True) 1280 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1281 clone_kind = ( 1282 self._match(TokenType.L_PAREN) 1283 and self._match_texts(self.CLONE_KINDS) 1284 and self._prev.text.upper() 1285 ) 1286 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1287 self._match(TokenType.R_PAREN) 1288 clone = self.expression( 1289 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1290 ) 1291 1292 return self.expression( 1293 exp.Create, 1294 comments=comments, 1295 this=this, 1296 kind=create_token.text, 1297 replace=replace, 1298 unique=unique, 1299 expression=expression, 1300 exists=exists, 1301 properties=properties, 1302 indexes=indexes, 1303 no_schema_binding=no_schema_binding, 1304 begin=begin, 1305 clone=clone, 1306 ) 1307 1308 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1309 # only used for teradata currently 1310 self._match(TokenType.COMMA) 1311 1312 kwargs = { 1313 "no": self._match_text_seq("NO"), 1314 "dual": self._match_text_seq("DUAL"), 1315 "before": self._match_text_seq("BEFORE"), 1316 "default": self._match_text_seq("DEFAULT"), 1317 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1318 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1319 "after": self._match_text_seq("AFTER"), 1320 "minimum": self._match_texts(("MIN", "MINIMUM")), 1321 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1322 } 1323 1324 if self._match_texts(self.PROPERTY_PARSERS): 1325 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1326 try: 1327 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1328 except TypeError: 1329 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1330 1331 return None 1332 1333 def _parse_property(self) -> t.Optional[exp.Expression]: 1334 if self._match_texts(self.PROPERTY_PARSERS): 1335 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1336 1337 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1338 return self._parse_character_set(default=True) 1339 1340 if self._match_text_seq("COMPOUND", "SORTKEY"): 1341 return self._parse_sortkey(compound=True) 1342 1343 if self._match_text_seq("SQL", "SECURITY"): 1344 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1345 1346 assignment = self._match_pair( 1347 TokenType.VAR, TokenType.EQ, advance=False 1348 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1349 1350 if assignment: 1351 key = self._parse_var_or_string() 1352 self._match(TokenType.EQ) 1353 return self.expression(exp.Property, this=key, value=self._parse_column()) 1354 1355 return None 1356 1357 def _parse_stored(self) -> exp.FileFormatProperty: 1358 self._match(TokenType.ALIAS) 1359 1360 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1361 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1362 1363 return self.expression( 1364 exp.FileFormatProperty, 1365 this=self.expression( 1366 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1367 ) 1368 if input_format or output_format 1369 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1370 ) 1371 1372 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1373 self._match(TokenType.EQ) 1374 self._match(TokenType.ALIAS) 1375 return self.expression(exp_class, this=self._parse_field()) 1376 1377 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1378 properties = [] 1379 while True: 1380 if before: 1381 prop = self._parse_property_before() 1382 else: 1383 prop = self._parse_property() 1384 1385 if not prop: 1386 break 1387 for p in ensure_list(prop): 1388 properties.append(p) 1389 1390 if properties: 1391 return self.expression(exp.Properties, expressions=properties) 1392 1393 return None 1394 1395 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1396 return self.expression( 1397 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1398 ) 1399 1400 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1401 if self._index >= 2: 1402 pre_volatile_token = self._tokens[self._index - 2] 1403 else: 1404 pre_volatile_token = None 1405 1406 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1407 return exp.VolatileProperty() 1408 1409 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1410 1411 def _parse_with_property( 1412 self, 1413 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1414 if self._match(TokenType.L_PAREN, advance=False): 1415 return self._parse_wrapped_csv(self._parse_property) 1416 1417 if self._match_text_seq("JOURNAL"): 1418 return self._parse_withjournaltable() 1419 1420 if self._match_text_seq("DATA"): 1421 return self._parse_withdata(no=False) 1422 elif self._match_text_seq("NO", "DATA"): 1423 return self._parse_withdata(no=True) 1424 1425 if not self._next: 1426 return None 1427 1428 return self._parse_withisolatedloading() 1429 1430 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1431 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1432 self._match(TokenType.EQ) 1433 1434 user = self._parse_id_var() 1435 self._match(TokenType.PARAMETER) 1436 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1437 1438 if not user or not host: 1439 return None 1440 1441 return exp.DefinerProperty(this=f"{user}@{host}") 1442 1443 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1444 self._match(TokenType.TABLE) 1445 self._match(TokenType.EQ) 1446 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1447 1448 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1449 return self.expression(exp.LogProperty, no=no) 1450 1451 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1452 return self.expression(exp.JournalProperty, **kwargs) 1453 1454 def _parse_checksum(self) -> exp.ChecksumProperty: 1455 self._match(TokenType.EQ) 1456 1457 on = None 1458 if self._match(TokenType.ON): 1459 on = True 1460 elif self._match_text_seq("OFF"): 1461 on = False 1462 1463 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1464 1465 def _parse_cluster(self) -> exp.Cluster: 1466 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1467 1468 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1469 self._match_text_seq("BY") 1470 1471 self._match_l_paren() 1472 expressions = self._parse_csv(self._parse_column) 1473 self._match_r_paren() 1474 1475 if self._match_text_seq("SORTED", "BY"): 1476 self._match_l_paren() 1477 sorted_by = self._parse_csv(self._parse_ordered) 1478 self._match_r_paren() 1479 else: 1480 sorted_by = None 1481 1482 self._match(TokenType.INTO) 1483 buckets = self._parse_number() 1484 self._match_text_seq("BUCKETS") 1485 1486 return self.expression( 1487 exp.ClusteredByProperty, 1488 expressions=expressions, 1489 sorted_by=sorted_by, 1490 buckets=buckets, 1491 ) 1492 1493 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1494 if not self._match_text_seq("GRANTS"): 1495 self._retreat(self._index - 1) 1496 return None 1497 1498 return self.expression(exp.CopyGrantsProperty) 1499 1500 def _parse_freespace(self) -> exp.FreespaceProperty: 1501 self._match(TokenType.EQ) 1502 return self.expression( 1503 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1504 ) 1505 1506 def _parse_mergeblockratio( 1507 self, no: bool = False, default: bool = False 1508 ) -> exp.MergeBlockRatioProperty: 1509 if self._match(TokenType.EQ): 1510 return self.expression( 1511 exp.MergeBlockRatioProperty, 1512 this=self._parse_number(), 1513 percent=self._match(TokenType.PERCENT), 1514 ) 1515 1516 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1517 1518 def _parse_datablocksize( 1519 self, 1520 default: t.Optional[bool] = None, 1521 minimum: t.Optional[bool] = None, 1522 maximum: t.Optional[bool] = None, 1523 ) -> exp.DataBlocksizeProperty: 1524 self._match(TokenType.EQ) 1525 size = self._parse_number() 1526 1527 units = None 1528 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1529 units = self._prev.text 1530 1531 return self.expression( 1532 exp.DataBlocksizeProperty, 1533 size=size, 1534 units=units, 1535 default=default, 1536 minimum=minimum, 1537 maximum=maximum, 1538 ) 1539 1540 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1541 self._match(TokenType.EQ) 1542 always = self._match_text_seq("ALWAYS") 1543 manual = self._match_text_seq("MANUAL") 1544 never = self._match_text_seq("NEVER") 1545 default = self._match_text_seq("DEFAULT") 1546 1547 autotemp = None 1548 if self._match_text_seq("AUTOTEMP"): 1549 autotemp = self._parse_schema() 1550 1551 return self.expression( 1552 exp.BlockCompressionProperty, 1553 always=always, 1554 manual=manual, 1555 never=never, 1556 default=default, 1557 autotemp=autotemp, 1558 ) 1559 1560 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1561 no = self._match_text_seq("NO") 1562 concurrent = self._match_text_seq("CONCURRENT") 1563 self._match_text_seq("ISOLATED", "LOADING") 1564 for_all = self._match_text_seq("FOR", "ALL") 1565 for_insert = self._match_text_seq("FOR", "INSERT") 1566 for_none = self._match_text_seq("FOR", "NONE") 1567 return self.expression( 1568 exp.IsolatedLoadingProperty, 1569 no=no, 1570 concurrent=concurrent, 1571 for_all=for_all, 1572 for_insert=for_insert, 1573 for_none=for_none, 1574 ) 1575 1576 def _parse_locking(self) -> exp.LockingProperty: 1577 if self._match(TokenType.TABLE): 1578 kind = "TABLE" 1579 elif self._match(TokenType.VIEW): 1580 kind = "VIEW" 1581 elif self._match(TokenType.ROW): 1582 kind = "ROW" 1583 elif self._match_text_seq("DATABASE"): 1584 kind = "DATABASE" 1585 else: 1586 kind = None 1587 1588 if kind in ("DATABASE", "TABLE", "VIEW"): 1589 this = self._parse_table_parts() 1590 else: 1591 this = None 1592 1593 if self._match(TokenType.FOR): 1594 for_or_in = "FOR" 1595 elif self._match(TokenType.IN): 1596 for_or_in = "IN" 1597 else: 1598 for_or_in = None 1599 1600 if self._match_text_seq("ACCESS"): 1601 lock_type = "ACCESS" 1602 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1603 lock_type = "EXCLUSIVE" 1604 elif self._match_text_seq("SHARE"): 1605 lock_type = "SHARE" 1606 elif self._match_text_seq("READ"): 1607 lock_type = "READ" 1608 elif self._match_text_seq("WRITE"): 1609 lock_type = "WRITE" 1610 elif self._match_text_seq("CHECKSUM"): 1611 lock_type = "CHECKSUM" 1612 else: 1613 lock_type = None 1614 1615 override = self._match_text_seq("OVERRIDE") 1616 1617 return self.expression( 1618 exp.LockingProperty, 1619 this=this, 1620 kind=kind, 1621 for_or_in=for_or_in, 1622 lock_type=lock_type, 1623 override=override, 1624 ) 1625 1626 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1627 if self._match(TokenType.PARTITION_BY): 1628 return self._parse_csv(self._parse_conjunction) 1629 return [] 1630 1631 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1632 self._match(TokenType.EQ) 1633 return self.expression( 1634 exp.PartitionedByProperty, 1635 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1636 ) 1637 1638 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1639 if self._match_text_seq("AND", "STATISTICS"): 1640 statistics = True 1641 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1642 statistics = False 1643 else: 1644 statistics = None 1645 1646 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1647 1648 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1649 if self._match_text_seq("PRIMARY", "INDEX"): 1650 return exp.NoPrimaryIndexProperty() 1651 return None 1652 1653 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1654 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1655 return exp.OnCommitProperty() 1656 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1657 return exp.OnCommitProperty(delete=True) 1658 return None 1659 1660 def _parse_distkey(self) -> exp.DistKeyProperty: 1661 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1662 1663 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1664 table = self._parse_table(schema=True) 1665 1666 options = [] 1667 while self._match_texts(("INCLUDING", "EXCLUDING")): 1668 this = self._prev.text.upper() 1669 1670 id_var = self._parse_id_var() 1671 if not id_var: 1672 return None 1673 1674 options.append( 1675 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1676 ) 1677 1678 return self.expression(exp.LikeProperty, this=table, expressions=options) 1679 1680 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1681 return self.expression( 1682 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1683 ) 1684 1685 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1686 self._match(TokenType.EQ) 1687 return self.expression( 1688 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1689 ) 1690 1691 def _parse_returns(self) -> exp.ReturnsProperty: 1692 value: t.Optional[exp.Expression] 1693 is_table = self._match(TokenType.TABLE) 1694 1695 if is_table: 1696 if self._match(TokenType.LT): 1697 value = self.expression( 1698 exp.Schema, 1699 this="TABLE", 1700 expressions=self._parse_csv(self._parse_struct_types), 1701 ) 1702 if not self._match(TokenType.GT): 1703 self.raise_error("Expecting >") 1704 else: 1705 value = self._parse_schema(exp.var("TABLE")) 1706 else: 1707 value = self._parse_types() 1708 1709 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1710 1711 def _parse_describe(self) -> exp.Describe: 1712 kind = self._match_set(self.CREATABLES) and self._prev.text 1713 this = self._parse_table() 1714 return self.expression(exp.Describe, this=this, kind=kind) 1715 1716 def _parse_insert(self) -> exp.Insert: 1717 comments = ensure_list(self._prev_comments) 1718 overwrite = self._match(TokenType.OVERWRITE) 1719 ignore = self._match(TokenType.IGNORE) 1720 local = self._match_text_seq("LOCAL") 1721 alternative = None 1722 1723 if self._match_text_seq("DIRECTORY"): 1724 this: t.Optional[exp.Expression] = self.expression( 1725 exp.Directory, 1726 this=self._parse_var_or_string(), 1727 local=local, 1728 row_format=self._parse_row_format(match_row=True), 1729 ) 1730 else: 1731 if self._match(TokenType.OR): 1732 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1733 1734 self._match(TokenType.INTO) 1735 comments += ensure_list(self._prev_comments) 1736 self._match(TokenType.TABLE) 1737 this = self._parse_table(schema=True) 1738 1739 returning = self._parse_returning() 1740 1741 return self.expression( 1742 exp.Insert, 1743 comments=comments, 1744 this=this, 1745 exists=self._parse_exists(), 1746 partition=self._parse_partition(), 1747 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1748 and self._parse_conjunction(), 1749 expression=self._parse_ddl_select(), 1750 conflict=self._parse_on_conflict(), 1751 returning=returning or self._parse_returning(), 1752 overwrite=overwrite, 1753 alternative=alternative, 1754 ignore=ignore, 1755 ) 1756 1757 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1758 conflict = self._match_text_seq("ON", "CONFLICT") 1759 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1760 1761 if not conflict and not duplicate: 1762 return None 1763 1764 nothing = None 1765 expressions = None 1766 key = None 1767 constraint = None 1768 1769 if conflict: 1770 if self._match_text_seq("ON", "CONSTRAINT"): 1771 constraint = self._parse_id_var() 1772 else: 1773 key = self._parse_csv(self._parse_value) 1774 1775 self._match_text_seq("DO") 1776 if self._match_text_seq("NOTHING"): 1777 nothing = True 1778 else: 1779 self._match(TokenType.UPDATE) 1780 self._match(TokenType.SET) 1781 expressions = self._parse_csv(self._parse_equality) 1782 1783 return self.expression( 1784 exp.OnConflict, 1785 duplicate=duplicate, 1786 expressions=expressions, 1787 nothing=nothing, 1788 key=key, 1789 constraint=constraint, 1790 ) 1791 1792 def _parse_returning(self) -> t.Optional[exp.Returning]: 1793 if not self._match(TokenType.RETURNING): 1794 return None 1795 return self.expression( 1796 exp.Returning, 1797 expressions=self._parse_csv(self._parse_expression), 1798 into=self._match(TokenType.INTO) and self._parse_table_part(), 1799 ) 1800 1801 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1802 if not self._match(TokenType.FORMAT): 1803 return None 1804 return self._parse_row_format() 1805 1806 def _parse_row_format( 1807 self, match_row: bool = False 1808 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1809 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1810 return None 1811 1812 if self._match_text_seq("SERDE"): 1813 this = self._parse_string() 1814 1815 serde_properties = None 1816 if self._match(TokenType.SERDE_PROPERTIES): 1817 serde_properties = self.expression( 1818 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1819 ) 1820 1821 return self.expression( 1822 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1823 ) 1824 1825 self._match_text_seq("DELIMITED") 1826 1827 kwargs = {} 1828 1829 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1830 kwargs["fields"] = self._parse_string() 1831 if self._match_text_seq("ESCAPED", "BY"): 1832 kwargs["escaped"] = self._parse_string() 1833 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1834 kwargs["collection_items"] = self._parse_string() 1835 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1836 kwargs["map_keys"] = self._parse_string() 1837 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1838 kwargs["lines"] = self._parse_string() 1839 if self._match_text_seq("NULL", "DEFINED", "AS"): 1840 kwargs["null"] = self._parse_string() 1841 1842 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1843 1844 def _parse_load(self) -> exp.LoadData | exp.Command: 1845 if self._match_text_seq("DATA"): 1846 local = self._match_text_seq("LOCAL") 1847 self._match_text_seq("INPATH") 1848 inpath = self._parse_string() 1849 overwrite = self._match(TokenType.OVERWRITE) 1850 self._match_pair(TokenType.INTO, TokenType.TABLE) 1851 1852 return self.expression( 1853 exp.LoadData, 1854 this=self._parse_table(schema=True), 1855 local=local, 1856 overwrite=overwrite, 1857 inpath=inpath, 1858 partition=self._parse_partition(), 1859 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1860 serde=self._match_text_seq("SERDE") and self._parse_string(), 1861 ) 1862 return self._parse_as_command(self._prev) 1863 1864 def _parse_delete(self) -> exp.Delete: 1865 # This handles MySQL's "Multiple-Table Syntax" 1866 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1867 tables = None 1868 comments = self._prev_comments 1869 if not self._match(TokenType.FROM, advance=False): 1870 tables = self._parse_csv(self._parse_table) or None 1871 1872 returning = self._parse_returning() 1873 1874 return self.expression( 1875 exp.Delete, 1876 comments=comments, 1877 tables=tables, 1878 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1879 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1880 where=self._parse_where(), 1881 returning=returning or self._parse_returning(), 1882 limit=self._parse_limit(), 1883 ) 1884 1885 def _parse_update(self) -> exp.Update: 1886 comments = self._prev_comments 1887 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1888 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1889 returning = self._parse_returning() 1890 return self.expression( 1891 exp.Update, 1892 comments=comments, 1893 **{ # type: ignore 1894 "this": this, 1895 "expressions": expressions, 1896 "from": self._parse_from(joins=True), 1897 "where": self._parse_where(), 1898 "returning": returning or self._parse_returning(), 1899 "limit": self._parse_limit(), 1900 }, 1901 ) 1902 1903 def _parse_uncache(self) -> exp.Uncache: 1904 if not self._match(TokenType.TABLE): 1905 self.raise_error("Expecting TABLE after UNCACHE") 1906 1907 return self.expression( 1908 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1909 ) 1910 1911 def _parse_cache(self) -> exp.Cache: 1912 lazy = self._match_text_seq("LAZY") 1913 self._match(TokenType.TABLE) 1914 table = self._parse_table(schema=True) 1915 1916 options = [] 1917 if self._match_text_seq("OPTIONS"): 1918 self._match_l_paren() 1919 k = self._parse_string() 1920 self._match(TokenType.EQ) 1921 v = self._parse_string() 1922 options = [k, v] 1923 self._match_r_paren() 1924 1925 self._match(TokenType.ALIAS) 1926 return self.expression( 1927 exp.Cache, 1928 this=table, 1929 lazy=lazy, 1930 options=options, 1931 expression=self._parse_select(nested=True), 1932 ) 1933 1934 def _parse_partition(self) -> t.Optional[exp.Partition]: 1935 if not self._match(TokenType.PARTITION): 1936 return None 1937 1938 return self.expression( 1939 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1940 ) 1941 1942 def _parse_value(self) -> exp.Tuple: 1943 if self._match(TokenType.L_PAREN): 1944 expressions = self._parse_csv(self._parse_conjunction) 1945 self._match_r_paren() 1946 return self.expression(exp.Tuple, expressions=expressions) 1947 1948 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1949 # https://prestodb.io/docs/current/sql/values.html 1950 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1951 1952 def _parse_projections(self) -> t.List[t.Optional[exp.Expression]]: 1953 return self._parse_expressions() 1954 1955 def _parse_select( 1956 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1957 ) -> t.Optional[exp.Expression]: 1958 cte = self._parse_with() 1959 if cte: 1960 this = self._parse_statement() 1961 1962 if not this: 1963 self.raise_error("Failed to parse any statement following CTE") 1964 return cte 1965 1966 if "with" in this.arg_types: 1967 this.set("with", cte) 1968 else: 1969 self.raise_error(f"{this.key} does not support CTE") 1970 this = cte 1971 elif self._match(TokenType.SELECT): 1972 comments = self._prev_comments 1973 1974 hint = self._parse_hint() 1975 all_ = self._match(TokenType.ALL) 1976 distinct = self._match(TokenType.DISTINCT) 1977 1978 kind = ( 1979 self._match(TokenType.ALIAS) 1980 and self._match_texts(("STRUCT", "VALUE")) 1981 and self._prev.text 1982 ) 1983 1984 if distinct: 1985 distinct = self.expression( 1986 exp.Distinct, 1987 on=self._parse_value() if self._match(TokenType.ON) else None, 1988 ) 1989 1990 if all_ and distinct: 1991 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1992 1993 limit = self._parse_limit(top=True) 1994 projections = self._parse_projections() 1995 1996 this = self.expression( 1997 exp.Select, 1998 kind=kind, 1999 hint=hint, 2000 distinct=distinct, 2001 expressions=projections, 2002 limit=limit, 2003 ) 2004 this.comments = comments 2005 2006 into = self._parse_into() 2007 if into: 2008 this.set("into", into) 2009 2010 from_ = self._parse_from() 2011 if from_: 2012 this.set("from", from_) 2013 2014 this = self._parse_query_modifiers(this) 2015 elif (table or nested) and self._match(TokenType.L_PAREN): 2016 if self._match(TokenType.PIVOT): 2017 this = self._parse_simplified_pivot() 2018 elif self._match(TokenType.FROM): 2019 this = exp.select("*").from_( 2020 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2021 ) 2022 else: 2023 this = self._parse_table() if table else self._parse_select(nested=True) 2024 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2025 2026 self._match_r_paren() 2027 2028 # We return early here so that the UNION isn't attached to the subquery by the 2029 # following call to _parse_set_operations, but instead becomes the parent node 2030 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2031 elif self._match(TokenType.VALUES): 2032 this = self.expression( 2033 exp.Values, 2034 expressions=self._parse_csv(self._parse_value), 2035 alias=self._parse_table_alias(), 2036 ) 2037 else: 2038 this = None 2039 2040 return self._parse_set_operations(this) 2041 2042 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2043 if not skip_with_token and not self._match(TokenType.WITH): 2044 return None 2045 2046 comments = self._prev_comments 2047 recursive = self._match(TokenType.RECURSIVE) 2048 2049 expressions = [] 2050 while True: 2051 expressions.append(self._parse_cte()) 2052 2053 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2054 break 2055 else: 2056 self._match(TokenType.WITH) 2057 2058 return self.expression( 2059 exp.With, comments=comments, expressions=expressions, recursive=recursive 2060 ) 2061 2062 def _parse_cte(self) -> exp.CTE: 2063 alias = self._parse_table_alias() 2064 if not alias or not alias.this: 2065 self.raise_error("Expected CTE to have alias") 2066 2067 self._match(TokenType.ALIAS) 2068 return self.expression( 2069 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2070 ) 2071 2072 def _parse_table_alias( 2073 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2074 ) -> t.Optional[exp.TableAlias]: 2075 any_token = self._match(TokenType.ALIAS) 2076 alias = ( 2077 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2078 or self._parse_string_as_identifier() 2079 ) 2080 2081 index = self._index 2082 if self._match(TokenType.L_PAREN): 2083 columns = self._parse_csv(self._parse_function_parameter) 2084 self._match_r_paren() if columns else self._retreat(index) 2085 else: 2086 columns = None 2087 2088 if not alias and not columns: 2089 return None 2090 2091 return self.expression(exp.TableAlias, this=alias, columns=columns) 2092 2093 def _parse_subquery( 2094 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2095 ) -> t.Optional[exp.Subquery]: 2096 if not this: 2097 return None 2098 2099 return self.expression( 2100 exp.Subquery, 2101 this=this, 2102 pivots=self._parse_pivots(), 2103 alias=self._parse_table_alias() if parse_alias else None, 2104 ) 2105 2106 def _parse_query_modifiers( 2107 self, this: t.Optional[exp.Expression] 2108 ) -> t.Optional[exp.Expression]: 2109 if isinstance(this, self.MODIFIABLES): 2110 for join in iter(self._parse_join, None): 2111 this.append("joins", join) 2112 for lateral in iter(self._parse_lateral, None): 2113 this.append("laterals", lateral) 2114 2115 while True: 2116 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2117 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2118 key, expression = parser(self) 2119 2120 if expression: 2121 this.set(key, expression) 2122 if key == "limit": 2123 offset = expression.args.pop("offset", None) 2124 if offset: 2125 this.set("offset", exp.Offset(expression=offset)) 2126 continue 2127 break 2128 return this 2129 2130 def _parse_hint(self) -> t.Optional[exp.Hint]: 2131 if self._match(TokenType.HINT): 2132 hints = [] 2133 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2134 hints.extend(hint) 2135 2136 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2137 self.raise_error("Expected */ after HINT") 2138 2139 return self.expression(exp.Hint, expressions=hints) 2140 2141 return None 2142 2143 def _parse_into(self) -> t.Optional[exp.Into]: 2144 if not self._match(TokenType.INTO): 2145 return None 2146 2147 temp = self._match(TokenType.TEMPORARY) 2148 unlogged = self._match_text_seq("UNLOGGED") 2149 self._match(TokenType.TABLE) 2150 2151 return self.expression( 2152 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2153 ) 2154 2155 def _parse_from( 2156 self, joins: bool = False, skip_from_token: bool = False 2157 ) -> t.Optional[exp.From]: 2158 if not skip_from_token and not self._match(TokenType.FROM): 2159 return None 2160 2161 return self.expression( 2162 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2163 ) 2164 2165 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2166 if not self._match(TokenType.MATCH_RECOGNIZE): 2167 return None 2168 2169 self._match_l_paren() 2170 2171 partition = self._parse_partition_by() 2172 order = self._parse_order() 2173 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2174 2175 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2176 rows = exp.var("ONE ROW PER MATCH") 2177 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2178 text = "ALL ROWS PER MATCH" 2179 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2180 text += f" SHOW EMPTY MATCHES" 2181 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2182 text += f" OMIT EMPTY MATCHES" 2183 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2184 text += f" WITH UNMATCHED ROWS" 2185 rows = exp.var(text) 2186 else: 2187 rows = None 2188 2189 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2190 text = "AFTER MATCH SKIP" 2191 if self._match_text_seq("PAST", "LAST", "ROW"): 2192 text += f" PAST LAST ROW" 2193 elif self._match_text_seq("TO", "NEXT", "ROW"): 2194 text += f" TO NEXT ROW" 2195 elif self._match_text_seq("TO", "FIRST"): 2196 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2197 elif self._match_text_seq("TO", "LAST"): 2198 text += f" TO LAST {self._advance_any().text}" # type: ignore 2199 after = exp.var(text) 2200 else: 2201 after = None 2202 2203 if self._match_text_seq("PATTERN"): 2204 self._match_l_paren() 2205 2206 if not self._curr: 2207 self.raise_error("Expecting )", self._curr) 2208 2209 paren = 1 2210 start = self._curr 2211 2212 while self._curr and paren > 0: 2213 if self._curr.token_type == TokenType.L_PAREN: 2214 paren += 1 2215 if self._curr.token_type == TokenType.R_PAREN: 2216 paren -= 1 2217 2218 end = self._prev 2219 self._advance() 2220 2221 if paren > 0: 2222 self.raise_error("Expecting )", self._curr) 2223 2224 pattern = exp.var(self._find_sql(start, end)) 2225 else: 2226 pattern = None 2227 2228 define = ( 2229 self._parse_csv( 2230 lambda: self.expression( 2231 exp.Alias, 2232 alias=self._parse_id_var(any_token=True), 2233 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2234 ) 2235 ) 2236 if self._match_text_seq("DEFINE") 2237 else None 2238 ) 2239 2240 self._match_r_paren() 2241 2242 return self.expression( 2243 exp.MatchRecognize, 2244 partition_by=partition, 2245 order=order, 2246 measures=measures, 2247 rows=rows, 2248 after=after, 2249 pattern=pattern, 2250 define=define, 2251 alias=self._parse_table_alias(), 2252 ) 2253 2254 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2255 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2256 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2257 2258 if outer_apply or cross_apply: 2259 this = self._parse_select(table=True) 2260 view = None 2261 outer = not cross_apply 2262 elif self._match(TokenType.LATERAL): 2263 this = self._parse_select(table=True) 2264 view = self._match(TokenType.VIEW) 2265 outer = self._match(TokenType.OUTER) 2266 else: 2267 return None 2268 2269 if not this: 2270 this = ( 2271 self._parse_unnest() 2272 or self._parse_function() 2273 or self._parse_id_var(any_token=False) 2274 ) 2275 2276 while self._match(TokenType.DOT): 2277 this = exp.Dot( 2278 this=this, 2279 expression=self._parse_function() or self._parse_id_var(any_token=False), 2280 ) 2281 2282 if view: 2283 table = self._parse_id_var(any_token=False) 2284 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2285 table_alias: t.Optional[exp.TableAlias] = self.expression( 2286 exp.TableAlias, this=table, columns=columns 2287 ) 2288 elif isinstance(this, exp.Subquery) and this.alias: 2289 # Ensures parity between the Subquery's and the Lateral's "alias" args 2290 table_alias = this.args["alias"].copy() 2291 else: 2292 table_alias = self._parse_table_alias() 2293 2294 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2295 2296 def _parse_join_parts( 2297 self, 2298 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2299 return ( 2300 self._match_set(self.JOIN_METHODS) and self._prev, 2301 self._match_set(self.JOIN_SIDES) and self._prev, 2302 self._match_set(self.JOIN_KINDS) and self._prev, 2303 ) 2304 2305 def _parse_join( 2306 self, skip_join_token: bool = False, parse_bracket: bool = False 2307 ) -> t.Optional[exp.Join]: 2308 if self._match(TokenType.COMMA): 2309 return self.expression(exp.Join, this=self._parse_table()) 2310 2311 index = self._index 2312 method, side, kind = self._parse_join_parts() 2313 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2314 join = self._match(TokenType.JOIN) 2315 2316 if not skip_join_token and not join: 2317 self._retreat(index) 2318 kind = None 2319 method = None 2320 side = None 2321 2322 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2323 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2324 2325 if not skip_join_token and not join and not outer_apply and not cross_apply: 2326 return None 2327 2328 if outer_apply: 2329 side = Token(TokenType.LEFT, "LEFT") 2330 2331 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2332 2333 if method: 2334 kwargs["method"] = method.text 2335 if side: 2336 kwargs["side"] = side.text 2337 if kind: 2338 kwargs["kind"] = kind.text 2339 if hint: 2340 kwargs["hint"] = hint 2341 2342 if self._match(TokenType.ON): 2343 kwargs["on"] = self._parse_conjunction() 2344 elif self._match(TokenType.USING): 2345 kwargs["using"] = self._parse_wrapped_id_vars() 2346 elif not (kind and kind.token_type == TokenType.CROSS): 2347 index = self._index 2348 joins = self._parse_joins() 2349 2350 if joins and self._match(TokenType.ON): 2351 kwargs["on"] = self._parse_conjunction() 2352 elif joins and self._match(TokenType.USING): 2353 kwargs["using"] = self._parse_wrapped_id_vars() 2354 else: 2355 joins = None 2356 self._retreat(index) 2357 2358 kwargs["this"].set("joins", joins) 2359 2360 comments = [c for token in (method, side, kind) if token for c in token.comments] 2361 return self.expression(exp.Join, comments=comments, **kwargs) 2362 2363 def _parse_index( 2364 self, 2365 index: t.Optional[exp.Expression] = None, 2366 ) -> t.Optional[exp.Index]: 2367 if index: 2368 unique = None 2369 primary = None 2370 amp = None 2371 2372 self._match(TokenType.ON) 2373 self._match(TokenType.TABLE) # hive 2374 table = self._parse_table_parts(schema=True) 2375 else: 2376 unique = self._match(TokenType.UNIQUE) 2377 primary = self._match_text_seq("PRIMARY") 2378 amp = self._match_text_seq("AMP") 2379 2380 if not self._match(TokenType.INDEX): 2381 return None 2382 2383 index = self._parse_id_var() 2384 table = None 2385 2386 using = self._parse_field() if self._match(TokenType.USING) else None 2387 2388 if self._match(TokenType.L_PAREN, advance=False): 2389 columns = self._parse_wrapped_csv(self._parse_ordered) 2390 else: 2391 columns = None 2392 2393 return self.expression( 2394 exp.Index, 2395 this=index, 2396 table=table, 2397 using=using, 2398 columns=columns, 2399 unique=unique, 2400 primary=primary, 2401 amp=amp, 2402 partition_by=self._parse_partition_by(), 2403 ) 2404 2405 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2406 hints: t.List[exp.Expression] = [] 2407 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2408 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2409 hints.append( 2410 self.expression( 2411 exp.WithTableHint, 2412 expressions=self._parse_csv( 2413 lambda: self._parse_function() or self._parse_var(any_token=True) 2414 ), 2415 ) 2416 ) 2417 self._match_r_paren() 2418 else: 2419 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2420 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2421 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2422 2423 self._match_texts({"INDEX", "KEY"}) 2424 if self._match(TokenType.FOR): 2425 hint.set("target", self._advance_any() and self._prev.text.upper()) 2426 2427 hint.set("expressions", self._parse_wrapped_id_vars()) 2428 hints.append(hint) 2429 2430 return hints or None 2431 2432 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2433 return ( 2434 (not schema and self._parse_function(optional_parens=False)) 2435 or self._parse_id_var(any_token=False) 2436 or self._parse_string_as_identifier() 2437 or self._parse_placeholder() 2438 ) 2439 2440 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2441 catalog = None 2442 db = None 2443 table = self._parse_table_part(schema=schema) 2444 2445 while self._match(TokenType.DOT): 2446 if catalog: 2447 # This allows nesting the table in arbitrarily many dot expressions if needed 2448 table = self.expression( 2449 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2450 ) 2451 else: 2452 catalog = db 2453 db = table 2454 table = self._parse_table_part(schema=schema) 2455 2456 if not table: 2457 self.raise_error(f"Expected table name but got {self._curr}") 2458 2459 return self.expression( 2460 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2461 ) 2462 2463 def _parse_table( 2464 self, 2465 schema: bool = False, 2466 joins: bool = False, 2467 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2468 parse_bracket: bool = False, 2469 ) -> t.Optional[exp.Expression]: 2470 lateral = self._parse_lateral() 2471 if lateral: 2472 return lateral 2473 2474 unnest = self._parse_unnest() 2475 if unnest: 2476 return unnest 2477 2478 values = self._parse_derived_table_values() 2479 if values: 2480 return values 2481 2482 subquery = self._parse_select(table=True) 2483 if subquery: 2484 if not subquery.args.get("pivots"): 2485 subquery.set("pivots", self._parse_pivots()) 2486 return subquery 2487 2488 bracket = parse_bracket and self._parse_bracket(None) 2489 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2490 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2491 2492 if schema: 2493 return self._parse_schema(this=this) 2494 2495 if self.ALIAS_POST_TABLESAMPLE: 2496 table_sample = self._parse_table_sample() 2497 2498 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2499 if alias: 2500 this.set("alias", alias) 2501 2502 if not this.args.get("pivots"): 2503 this.set("pivots", self._parse_pivots()) 2504 2505 this.set("hints", self._parse_table_hints()) 2506 2507 if not self.ALIAS_POST_TABLESAMPLE: 2508 table_sample = self._parse_table_sample() 2509 2510 if table_sample: 2511 table_sample.set("this", this) 2512 this = table_sample 2513 2514 if joins: 2515 for join in iter(self._parse_join, None): 2516 this.append("joins", join) 2517 2518 return this 2519 2520 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2521 if not self._match(TokenType.UNNEST): 2522 return None 2523 2524 expressions = self._parse_wrapped_csv(self._parse_type) 2525 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2526 2527 alias = self._parse_table_alias() if with_alias else None 2528 2529 if alias and self.UNNEST_COLUMN_ONLY: 2530 if alias.args.get("columns"): 2531 self.raise_error("Unexpected extra column alias in unnest.") 2532 2533 alias.set("columns", [alias.this]) 2534 alias.set("this", None) 2535 2536 offset = None 2537 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2538 self._match(TokenType.ALIAS) 2539 offset = self._parse_id_var() or exp.to_identifier("offset") 2540 2541 return self.expression( 2542 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2543 ) 2544 2545 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2546 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2547 if not is_derived and not self._match(TokenType.VALUES): 2548 return None 2549 2550 expressions = self._parse_csv(self._parse_value) 2551 alias = self._parse_table_alias() 2552 2553 if is_derived: 2554 self._match_r_paren() 2555 2556 return self.expression( 2557 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2558 ) 2559 2560 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2561 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2562 as_modifier and self._match_text_seq("USING", "SAMPLE") 2563 ): 2564 return None 2565 2566 bucket_numerator = None 2567 bucket_denominator = None 2568 bucket_field = None 2569 percent = None 2570 rows = None 2571 size = None 2572 seed = None 2573 2574 kind = ( 2575 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2576 ) 2577 method = self._parse_var(tokens=(TokenType.ROW,)) 2578 2579 self._match(TokenType.L_PAREN) 2580 2581 num = self._parse_number() 2582 2583 if self._match_text_seq("BUCKET"): 2584 bucket_numerator = self._parse_number() 2585 self._match_text_seq("OUT", "OF") 2586 bucket_denominator = bucket_denominator = self._parse_number() 2587 self._match(TokenType.ON) 2588 bucket_field = self._parse_field() 2589 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2590 percent = num 2591 elif self._match(TokenType.ROWS): 2592 rows = num 2593 else: 2594 size = num 2595 2596 self._match(TokenType.R_PAREN) 2597 2598 if self._match(TokenType.L_PAREN): 2599 method = self._parse_var() 2600 seed = self._match(TokenType.COMMA) and self._parse_number() 2601 self._match_r_paren() 2602 elif self._match_texts(("SEED", "REPEATABLE")): 2603 seed = self._parse_wrapped(self._parse_number) 2604 2605 return self.expression( 2606 exp.TableSample, 2607 method=method, 2608 bucket_numerator=bucket_numerator, 2609 bucket_denominator=bucket_denominator, 2610 bucket_field=bucket_field, 2611 percent=percent, 2612 rows=rows, 2613 size=size, 2614 seed=seed, 2615 kind=kind, 2616 ) 2617 2618 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2619 return list(iter(self._parse_pivot, None)) or None 2620 2621 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2622 return list(iter(self._parse_join, None)) or None 2623 2624 # https://duckdb.org/docs/sql/statements/pivot 2625 def _parse_simplified_pivot(self) -> exp.Pivot: 2626 def _parse_on() -> t.Optional[exp.Expression]: 2627 this = self._parse_bitwise() 2628 return self._parse_in(this) if self._match(TokenType.IN) else this 2629 2630 this = self._parse_table() 2631 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2632 using = self._match(TokenType.USING) and self._parse_csv( 2633 lambda: self._parse_alias(self._parse_function()) 2634 ) 2635 group = self._parse_group() 2636 return self.expression( 2637 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2638 ) 2639 2640 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2641 index = self._index 2642 include_nulls = None 2643 2644 if self._match(TokenType.PIVOT): 2645 unpivot = False 2646 elif self._match(TokenType.UNPIVOT): 2647 unpivot = True 2648 2649 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2650 if self._match_text_seq("INCLUDE", "NULLS"): 2651 include_nulls = True 2652 elif self._match_text_seq("EXCLUDE", "NULLS"): 2653 include_nulls = False 2654 else: 2655 return None 2656 2657 expressions = [] 2658 field = None 2659 2660 if not self._match(TokenType.L_PAREN): 2661 self._retreat(index) 2662 return None 2663 2664 if unpivot: 2665 expressions = self._parse_csv(self._parse_column) 2666 else: 2667 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2668 2669 if not expressions: 2670 self.raise_error("Failed to parse PIVOT's aggregation list") 2671 2672 if not self._match(TokenType.FOR): 2673 self.raise_error("Expecting FOR") 2674 2675 value = self._parse_column() 2676 2677 if not self._match(TokenType.IN): 2678 self.raise_error("Expecting IN") 2679 2680 field = self._parse_in(value, alias=True) 2681 2682 self._match_r_paren() 2683 2684 pivot = self.expression( 2685 exp.Pivot, 2686 expressions=expressions, 2687 field=field, 2688 unpivot=unpivot, 2689 include_nulls=include_nulls, 2690 ) 2691 2692 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2693 pivot.set("alias", self._parse_table_alias()) 2694 2695 if not unpivot: 2696 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2697 2698 columns: t.List[exp.Expression] = [] 2699 for fld in pivot.args["field"].expressions: 2700 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2701 for name in names: 2702 if self.PREFIXED_PIVOT_COLUMNS: 2703 name = f"{name}_{field_name}" if name else field_name 2704 else: 2705 name = f"{field_name}_{name}" if name else field_name 2706 2707 columns.append(exp.to_identifier(name)) 2708 2709 pivot.set("columns", columns) 2710 2711 return pivot 2712 2713 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2714 return [agg.alias for agg in aggregations] 2715 2716 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2717 if not skip_where_token and not self._match(TokenType.WHERE): 2718 return None 2719 2720 return self.expression( 2721 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2722 ) 2723 2724 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2725 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2726 return None 2727 2728 elements = defaultdict(list) 2729 2730 if self._match(TokenType.ALL): 2731 return self.expression(exp.Group, all=True) 2732 2733 while True: 2734 expressions = self._parse_csv(self._parse_conjunction) 2735 if expressions: 2736 elements["expressions"].extend(expressions) 2737 2738 grouping_sets = self._parse_grouping_sets() 2739 if grouping_sets: 2740 elements["grouping_sets"].extend(grouping_sets) 2741 2742 rollup = None 2743 cube = None 2744 totals = None 2745 2746 with_ = self._match(TokenType.WITH) 2747 if self._match(TokenType.ROLLUP): 2748 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2749 elements["rollup"].extend(ensure_list(rollup)) 2750 2751 if self._match(TokenType.CUBE): 2752 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2753 elements["cube"].extend(ensure_list(cube)) 2754 2755 if self._match_text_seq("TOTALS"): 2756 totals = True 2757 elements["totals"] = True # type: ignore 2758 2759 if not (grouping_sets or rollup or cube or totals): 2760 break 2761 2762 return self.expression(exp.Group, **elements) # type: ignore 2763 2764 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2765 if not self._match(TokenType.GROUPING_SETS): 2766 return None 2767 2768 return self._parse_wrapped_csv(self._parse_grouping_set) 2769 2770 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2771 if self._match(TokenType.L_PAREN): 2772 grouping_set = self._parse_csv(self._parse_column) 2773 self._match_r_paren() 2774 return self.expression(exp.Tuple, expressions=grouping_set) 2775 2776 return self._parse_column() 2777 2778 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2779 if not skip_having_token and not self._match(TokenType.HAVING): 2780 return None 2781 return self.expression(exp.Having, this=self._parse_conjunction()) 2782 2783 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2784 if not self._match(TokenType.QUALIFY): 2785 return None 2786 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2787 2788 def _parse_order( 2789 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2790 ) -> t.Optional[exp.Expression]: 2791 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2792 return this 2793 2794 return self.expression( 2795 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2796 ) 2797 2798 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2799 if not self._match(token): 2800 return None 2801 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2802 2803 def _parse_ordered(self) -> exp.Ordered: 2804 this = self._parse_conjunction() 2805 self._match(TokenType.ASC) 2806 2807 is_desc = self._match(TokenType.DESC) 2808 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2809 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2810 desc = is_desc or False 2811 asc = not desc 2812 nulls_first = is_nulls_first or False 2813 explicitly_null_ordered = is_nulls_first or is_nulls_last 2814 2815 if ( 2816 not explicitly_null_ordered 2817 and ( 2818 (asc and self.NULL_ORDERING == "nulls_are_small") 2819 or (desc and self.NULL_ORDERING != "nulls_are_small") 2820 ) 2821 and self.NULL_ORDERING != "nulls_are_last" 2822 ): 2823 nulls_first = True 2824 2825 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2826 2827 def _parse_limit( 2828 self, this: t.Optional[exp.Expression] = None, top: bool = False 2829 ) -> t.Optional[exp.Expression]: 2830 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2831 comments = self._prev_comments 2832 if top: 2833 limit_paren = self._match(TokenType.L_PAREN) 2834 expression = self._parse_number() 2835 2836 if limit_paren: 2837 self._match_r_paren() 2838 else: 2839 expression = self._parse_term() 2840 2841 if self._match(TokenType.COMMA): 2842 offset = expression 2843 expression = self._parse_term() 2844 else: 2845 offset = None 2846 2847 limit_exp = self.expression( 2848 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2849 ) 2850 2851 return limit_exp 2852 2853 if self._match(TokenType.FETCH): 2854 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2855 direction = self._prev.text if direction else "FIRST" 2856 2857 count = self._parse_number() 2858 percent = self._match(TokenType.PERCENT) 2859 2860 self._match_set((TokenType.ROW, TokenType.ROWS)) 2861 2862 only = self._match_text_seq("ONLY") 2863 with_ties = self._match_text_seq("WITH", "TIES") 2864 2865 if only and with_ties: 2866 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2867 2868 return self.expression( 2869 exp.Fetch, 2870 direction=direction, 2871 count=count, 2872 percent=percent, 2873 with_ties=with_ties, 2874 ) 2875 2876 return this 2877 2878 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2879 if not self._match(TokenType.OFFSET): 2880 return this 2881 2882 count = self._parse_term() 2883 self._match_set((TokenType.ROW, TokenType.ROWS)) 2884 return self.expression(exp.Offset, this=this, expression=count) 2885 2886 def _parse_locks(self) -> t.List[exp.Lock]: 2887 locks = [] 2888 while True: 2889 if self._match_text_seq("FOR", "UPDATE"): 2890 update = True 2891 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2892 "LOCK", "IN", "SHARE", "MODE" 2893 ): 2894 update = False 2895 else: 2896 break 2897 2898 expressions = None 2899 if self._match_text_seq("OF"): 2900 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2901 2902 wait: t.Optional[bool | exp.Expression] = None 2903 if self._match_text_seq("NOWAIT"): 2904 wait = True 2905 elif self._match_text_seq("WAIT"): 2906 wait = self._parse_primary() 2907 elif self._match_text_seq("SKIP", "LOCKED"): 2908 wait = False 2909 2910 locks.append( 2911 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2912 ) 2913 2914 return locks 2915 2916 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2917 if not self._match_set(self.SET_OPERATIONS): 2918 return this 2919 2920 token_type = self._prev.token_type 2921 2922 if token_type == TokenType.UNION: 2923 expression = exp.Union 2924 elif token_type == TokenType.EXCEPT: 2925 expression = exp.Except 2926 else: 2927 expression = exp.Intersect 2928 2929 return self.expression( 2930 expression, 2931 this=this, 2932 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2933 expression=self._parse_set_operations(self._parse_select(nested=True)), 2934 ) 2935 2936 def _parse_expression(self) -> t.Optional[exp.Expression]: 2937 return self._parse_alias(self._parse_conjunction()) 2938 2939 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2940 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2941 2942 def _parse_equality(self) -> t.Optional[exp.Expression]: 2943 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2944 2945 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2946 return self._parse_tokens(self._parse_range, self.COMPARISON) 2947 2948 def _parse_range(self) -> t.Optional[exp.Expression]: 2949 this = self._parse_bitwise() 2950 negate = self._match(TokenType.NOT) 2951 2952 if self._match_set(self.RANGE_PARSERS): 2953 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2954 if not expression: 2955 return this 2956 2957 this = expression 2958 elif self._match(TokenType.ISNULL): 2959 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2960 2961 # Postgres supports ISNULL and NOTNULL for conditions. 2962 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2963 if self._match(TokenType.NOTNULL): 2964 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2965 this = self.expression(exp.Not, this=this) 2966 2967 if negate: 2968 this = self.expression(exp.Not, this=this) 2969 2970 if self._match(TokenType.IS): 2971 this = self._parse_is(this) 2972 2973 return this 2974 2975 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2976 index = self._index - 1 2977 negate = self._match(TokenType.NOT) 2978 2979 if self._match_text_seq("DISTINCT", "FROM"): 2980 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2981 return self.expression(klass, this=this, expression=self._parse_expression()) 2982 2983 expression = self._parse_null() or self._parse_boolean() 2984 if not expression: 2985 self._retreat(index) 2986 return None 2987 2988 this = self.expression(exp.Is, this=this, expression=expression) 2989 return self.expression(exp.Not, this=this) if negate else this 2990 2991 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2992 unnest = self._parse_unnest(with_alias=False) 2993 if unnest: 2994 this = self.expression(exp.In, this=this, unnest=unnest) 2995 elif self._match(TokenType.L_PAREN): 2996 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2997 2998 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2999 this = self.expression(exp.In, this=this, query=expressions[0]) 3000 else: 3001 this = self.expression(exp.In, this=this, expressions=expressions) 3002 3003 self._match_r_paren(this) 3004 else: 3005 this = self.expression(exp.In, this=this, field=self._parse_field()) 3006 3007 return this 3008 3009 def _parse_between(self, this: exp.Expression) -> exp.Between: 3010 low = self._parse_bitwise() 3011 self._match(TokenType.AND) 3012 high = self._parse_bitwise() 3013 return self.expression(exp.Between, this=this, low=low, high=high) 3014 3015 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3016 if not self._match(TokenType.ESCAPE): 3017 return this 3018 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3019 3020 def _parse_interval(self) -> t.Optional[exp.Interval]: 3021 if not self._match(TokenType.INTERVAL): 3022 return None 3023 3024 if self._match(TokenType.STRING, advance=False): 3025 this = self._parse_primary() 3026 else: 3027 this = self._parse_term() 3028 3029 unit = self._parse_function() or self._parse_var() 3030 3031 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3032 # each INTERVAL expression into this canonical form so it's easy to transpile 3033 if this and this.is_number: 3034 this = exp.Literal.string(this.name) 3035 elif this and this.is_string: 3036 parts = this.name.split() 3037 3038 if len(parts) == 2: 3039 if unit: 3040 # this is not actually a unit, it's something else 3041 unit = None 3042 self._retreat(self._index - 1) 3043 else: 3044 this = exp.Literal.string(parts[0]) 3045 unit = self.expression(exp.Var, this=parts[1]) 3046 3047 return self.expression(exp.Interval, this=this, unit=unit) 3048 3049 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3050 this = self._parse_term() 3051 3052 while True: 3053 if self._match_set(self.BITWISE): 3054 this = self.expression( 3055 self.BITWISE[self._prev.token_type], 3056 this=this, 3057 expression=self._parse_term(), 3058 ) 3059 elif self._match(TokenType.DQMARK): 3060 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3061 elif self._match_pair(TokenType.LT, TokenType.LT): 3062 this = self.expression( 3063 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3064 ) 3065 elif self._match_pair(TokenType.GT, TokenType.GT): 3066 this = self.expression( 3067 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3068 ) 3069 else: 3070 break 3071 3072 return this 3073 3074 def _parse_term(self) -> t.Optional[exp.Expression]: 3075 return self._parse_tokens(self._parse_factor, self.TERM) 3076 3077 def _parse_factor(self) -> t.Optional[exp.Expression]: 3078 return self._parse_tokens(self._parse_unary, self.FACTOR) 3079 3080 def _parse_unary(self) -> t.Optional[exp.Expression]: 3081 if self._match_set(self.UNARY_PARSERS): 3082 return self.UNARY_PARSERS[self._prev.token_type](self) 3083 return self._parse_at_time_zone(self._parse_type()) 3084 3085 def _parse_type(self) -> t.Optional[exp.Expression]: 3086 interval = self._parse_interval() 3087 if interval: 3088 return interval 3089 3090 index = self._index 3091 data_type = self._parse_types(check_func=True) 3092 this = self._parse_column() 3093 3094 if data_type: 3095 if isinstance(this, exp.Literal): 3096 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3097 if parser: 3098 return parser(self, this, data_type) 3099 return self.expression(exp.Cast, this=this, to=data_type) 3100 if not data_type.expressions: 3101 self._retreat(index) 3102 return self._parse_column() 3103 return self._parse_column_ops(data_type) 3104 3105 return this 3106 3107 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3108 this = self._parse_type() 3109 if not this: 3110 return None 3111 3112 return self.expression( 3113 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3114 ) 3115 3116 def _parse_types( 3117 self, check_func: bool = False, schema: bool = False 3118 ) -> t.Optional[exp.Expression]: 3119 index = self._index 3120 3121 prefix = self._match_text_seq("SYSUDTLIB", ".") 3122 3123 if not self._match_set(self.TYPE_TOKENS): 3124 return None 3125 3126 type_token = self._prev.token_type 3127 3128 if type_token == TokenType.PSEUDO_TYPE: 3129 return self.expression(exp.PseudoType, this=self._prev.text) 3130 3131 nested = type_token in self.NESTED_TYPE_TOKENS 3132 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3133 expressions = None 3134 maybe_func = False 3135 3136 if self._match(TokenType.L_PAREN): 3137 if is_struct: 3138 expressions = self._parse_csv(self._parse_struct_types) 3139 elif nested: 3140 expressions = self._parse_csv( 3141 lambda: self._parse_types(check_func=check_func, schema=schema) 3142 ) 3143 elif type_token in self.ENUM_TYPE_TOKENS: 3144 expressions = self._parse_csv(self._parse_equality) 3145 else: 3146 expressions = self._parse_csv(self._parse_type_size) 3147 3148 if not expressions or not self._match(TokenType.R_PAREN): 3149 self._retreat(index) 3150 return None 3151 3152 maybe_func = True 3153 3154 this: t.Optional[exp.Expression] = None 3155 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3156 3157 if nested and self._match(TokenType.LT): 3158 if is_struct: 3159 expressions = self._parse_csv(self._parse_struct_types) 3160 else: 3161 expressions = self._parse_csv( 3162 lambda: self._parse_types(check_func=check_func, schema=schema) 3163 ) 3164 3165 if not self._match(TokenType.GT): 3166 self.raise_error("Expecting >") 3167 3168 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3169 values = self._parse_csv(self._parse_conjunction) 3170 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3171 3172 if type_token in self.TIMESTAMPS: 3173 if self._match_text_seq("WITH", "TIME", "ZONE"): 3174 maybe_func = False 3175 tz_type = ( 3176 exp.DataType.Type.TIMETZ 3177 if type_token in self.TIMES 3178 else exp.DataType.Type.TIMESTAMPTZ 3179 ) 3180 this = exp.DataType(this=tz_type, expressions=expressions) 3181 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3182 maybe_func = False 3183 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3184 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3185 maybe_func = False 3186 elif type_token == TokenType.INTERVAL: 3187 if self._match_text_seq("YEAR", "TO", "MONTH"): 3188 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3189 elif self._match_text_seq("DAY", "TO", "SECOND"): 3190 span = [exp.IntervalDayToSecondSpan()] 3191 else: 3192 span = None 3193 3194 unit = not span and self._parse_var() 3195 if not unit: 3196 this = self.expression( 3197 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3198 ) 3199 else: 3200 this = self.expression(exp.Interval, unit=unit) 3201 3202 if maybe_func and check_func: 3203 index2 = self._index 3204 peek = self._parse_string() 3205 3206 if not peek: 3207 self._retreat(index) 3208 return None 3209 3210 self._retreat(index2) 3211 3212 if not this: 3213 this = exp.DataType( 3214 this=exp.DataType.Type[type_token.value], 3215 expressions=expressions, 3216 nested=nested, 3217 values=values, 3218 prefix=prefix, 3219 ) 3220 3221 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3222 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3223 3224 return this 3225 3226 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3227 this = self._parse_type() or self._parse_id_var() 3228 self._match(TokenType.COLON) 3229 return self._parse_column_def(this) 3230 3231 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3232 if not self._match_text_seq("AT", "TIME", "ZONE"): 3233 return this 3234 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3235 3236 def _parse_column(self) -> t.Optional[exp.Expression]: 3237 this = self._parse_field() 3238 if isinstance(this, exp.Identifier): 3239 this = self.expression(exp.Column, this=this) 3240 elif not this: 3241 return self._parse_bracket(this) 3242 return self._parse_column_ops(this) 3243 3244 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3245 this = self._parse_bracket(this) 3246 3247 while self._match_set(self.COLUMN_OPERATORS): 3248 op_token = self._prev.token_type 3249 op = self.COLUMN_OPERATORS.get(op_token) 3250 3251 if op_token == TokenType.DCOLON: 3252 field = self._parse_types() 3253 if not field: 3254 self.raise_error("Expected type") 3255 elif op and self._curr: 3256 self._advance() 3257 value = self._prev.text 3258 field = ( 3259 exp.Literal.number(value) 3260 if self._prev.token_type == TokenType.NUMBER 3261 else exp.Literal.string(value) 3262 ) 3263 else: 3264 field = self._parse_field(anonymous_func=True, any_token=True) 3265 3266 if isinstance(field, exp.Func): 3267 # bigquery allows function calls like x.y.count(...) 3268 # SAFE.SUBSTR(...) 3269 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3270 this = self._replace_columns_with_dots(this) 3271 3272 if op: 3273 this = op(self, this, field) 3274 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3275 this = self.expression( 3276 exp.Column, 3277 this=field, 3278 table=this.this, 3279 db=this.args.get("table"), 3280 catalog=this.args.get("db"), 3281 ) 3282 else: 3283 this = self.expression(exp.Dot, this=this, expression=field) 3284 this = self._parse_bracket(this) 3285 return this 3286 3287 def _parse_primary(self) -> t.Optional[exp.Expression]: 3288 if self._match_set(self.PRIMARY_PARSERS): 3289 token_type = self._prev.token_type 3290 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3291 3292 if token_type == TokenType.STRING: 3293 expressions = [primary] 3294 while self._match(TokenType.STRING): 3295 expressions.append(exp.Literal.string(self._prev.text)) 3296 3297 if len(expressions) > 1: 3298 return self.expression(exp.Concat, expressions=expressions) 3299 3300 return primary 3301 3302 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3303 return exp.Literal.number(f"0.{self._prev.text}") 3304 3305 if self._match(TokenType.L_PAREN): 3306 comments = self._prev_comments 3307 query = self._parse_select() 3308 3309 if query: 3310 expressions = [query] 3311 else: 3312 expressions = self._parse_expressions() 3313 3314 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3315 3316 if isinstance(this, exp.Subqueryable): 3317 this = self._parse_set_operations( 3318 self._parse_subquery(this=this, parse_alias=False) 3319 ) 3320 elif len(expressions) > 1: 3321 this = self.expression(exp.Tuple, expressions=expressions) 3322 else: 3323 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3324 3325 if this: 3326 this.add_comments(comments) 3327 3328 self._match_r_paren(expression=this) 3329 return this 3330 3331 return None 3332 3333 def _parse_field( 3334 self, 3335 any_token: bool = False, 3336 tokens: t.Optional[t.Collection[TokenType]] = None, 3337 anonymous_func: bool = False, 3338 ) -> t.Optional[exp.Expression]: 3339 return ( 3340 self._parse_primary() 3341 or self._parse_function(anonymous=anonymous_func) 3342 or self._parse_id_var(any_token=any_token, tokens=tokens) 3343 ) 3344 3345 def _parse_function( 3346 self, 3347 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3348 anonymous: bool = False, 3349 optional_parens: bool = True, 3350 ) -> t.Optional[exp.Expression]: 3351 if not self._curr: 3352 return None 3353 3354 token_type = self._curr.token_type 3355 this = self._curr.text 3356 upper = this.upper() 3357 3358 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3359 if optional_parens and parser: 3360 self._advance() 3361 return parser(self) 3362 3363 if not self._next or self._next.token_type != TokenType.L_PAREN: 3364 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3365 self._advance() 3366 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3367 3368 return None 3369 3370 if token_type not in self.FUNC_TOKENS: 3371 return None 3372 3373 self._advance(2) 3374 3375 parser = self.FUNCTION_PARSERS.get(upper) 3376 if parser and not anonymous: 3377 this = parser(self) 3378 else: 3379 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3380 3381 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3382 this = self.expression(subquery_predicate, this=self._parse_select()) 3383 self._match_r_paren() 3384 return this 3385 3386 if functions is None: 3387 functions = self.FUNCTIONS 3388 3389 function = functions.get(upper) 3390 3391 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3392 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3393 3394 if function and not anonymous: 3395 func = self.validate_expression(function(args), args) 3396 if not self.NORMALIZE_FUNCTIONS: 3397 func.meta["name"] = this 3398 this = func 3399 else: 3400 this = self.expression(exp.Anonymous, this=this, expressions=args) 3401 3402 self._match_r_paren(this) 3403 return self._parse_window(this) 3404 3405 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3406 return self._parse_column_def(self._parse_id_var()) 3407 3408 def _parse_user_defined_function( 3409 self, kind: t.Optional[TokenType] = None 3410 ) -> t.Optional[exp.Expression]: 3411 this = self._parse_id_var() 3412 3413 while self._match(TokenType.DOT): 3414 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3415 3416 if not self._match(TokenType.L_PAREN): 3417 return this 3418 3419 expressions = self._parse_csv(self._parse_function_parameter) 3420 self._match_r_paren() 3421 return self.expression( 3422 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3423 ) 3424 3425 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3426 literal = self._parse_primary() 3427 if literal: 3428 return self.expression(exp.Introducer, this=token.text, expression=literal) 3429 3430 return self.expression(exp.Identifier, this=token.text) 3431 3432 def _parse_session_parameter(self) -> exp.SessionParameter: 3433 kind = None 3434 this = self._parse_id_var() or self._parse_primary() 3435 3436 if this and self._match(TokenType.DOT): 3437 kind = this.name 3438 this = self._parse_var() or self._parse_primary() 3439 3440 return self.expression(exp.SessionParameter, this=this, kind=kind) 3441 3442 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3443 index = self._index 3444 3445 if self._match(TokenType.L_PAREN): 3446 expressions = self._parse_csv(self._parse_id_var) 3447 3448 if not self._match(TokenType.R_PAREN): 3449 self._retreat(index) 3450 else: 3451 expressions = [self._parse_id_var()] 3452 3453 if self._match_set(self.LAMBDAS): 3454 return self.LAMBDAS[self._prev.token_type](self, expressions) 3455 3456 self._retreat(index) 3457 3458 this: t.Optional[exp.Expression] 3459 3460 if self._match(TokenType.DISTINCT): 3461 this = self.expression( 3462 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3463 ) 3464 else: 3465 this = self._parse_select_or_expression(alias=alias) 3466 3467 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3468 3469 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3470 index = self._index 3471 3472 if not self.errors: 3473 try: 3474 if self._parse_select(nested=True): 3475 return this 3476 except ParseError: 3477 pass 3478 finally: 3479 self.errors.clear() 3480 self._retreat(index) 3481 3482 if not self._match(TokenType.L_PAREN): 3483 return this 3484 3485 args = self._parse_csv( 3486 lambda: self._parse_constraint() 3487 or self._parse_column_def(self._parse_field(any_token=True)) 3488 ) 3489 3490 self._match_r_paren() 3491 return self.expression(exp.Schema, this=this, expressions=args) 3492 3493 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3494 # column defs are not really columns, they're identifiers 3495 if isinstance(this, exp.Column): 3496 this = this.this 3497 3498 kind = self._parse_types(schema=True) 3499 3500 if self._match_text_seq("FOR", "ORDINALITY"): 3501 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3502 3503 constraints = [] 3504 while True: 3505 constraint = self._parse_column_constraint() 3506 if not constraint: 3507 break 3508 constraints.append(constraint) 3509 3510 if not kind and not constraints: 3511 return this 3512 3513 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3514 3515 def _parse_auto_increment( 3516 self, 3517 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3518 start = None 3519 increment = None 3520 3521 if self._match(TokenType.L_PAREN, advance=False): 3522 args = self._parse_wrapped_csv(self._parse_bitwise) 3523 start = seq_get(args, 0) 3524 increment = seq_get(args, 1) 3525 elif self._match_text_seq("START"): 3526 start = self._parse_bitwise() 3527 self._match_text_seq("INCREMENT") 3528 increment = self._parse_bitwise() 3529 3530 if start and increment: 3531 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3532 3533 return exp.AutoIncrementColumnConstraint() 3534 3535 def _parse_compress(self) -> exp.CompressColumnConstraint: 3536 if self._match(TokenType.L_PAREN, advance=False): 3537 return self.expression( 3538 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3539 ) 3540 3541 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3542 3543 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3544 if self._match_text_seq("BY", "DEFAULT"): 3545 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3546 this = self.expression( 3547 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3548 ) 3549 else: 3550 self._match_text_seq("ALWAYS") 3551 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3552 3553 self._match(TokenType.ALIAS) 3554 identity = self._match_text_seq("IDENTITY") 3555 3556 if self._match(TokenType.L_PAREN): 3557 if self._match_text_seq("START", "WITH"): 3558 this.set("start", self._parse_bitwise()) 3559 if self._match_text_seq("INCREMENT", "BY"): 3560 this.set("increment", self._parse_bitwise()) 3561 if self._match_text_seq("MINVALUE"): 3562 this.set("minvalue", self._parse_bitwise()) 3563 if self._match_text_seq("MAXVALUE"): 3564 this.set("maxvalue", self._parse_bitwise()) 3565 3566 if self._match_text_seq("CYCLE"): 3567 this.set("cycle", True) 3568 elif self._match_text_seq("NO", "CYCLE"): 3569 this.set("cycle", False) 3570 3571 if not identity: 3572 this.set("expression", self._parse_bitwise()) 3573 3574 self._match_r_paren() 3575 3576 return this 3577 3578 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3579 self._match_text_seq("LENGTH") 3580 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3581 3582 def _parse_not_constraint( 3583 self, 3584 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3585 if self._match_text_seq("NULL"): 3586 return self.expression(exp.NotNullColumnConstraint) 3587 if self._match_text_seq("CASESPECIFIC"): 3588 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3589 return None 3590 3591 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3592 if self._match(TokenType.CONSTRAINT): 3593 this = self._parse_id_var() 3594 else: 3595 this = None 3596 3597 if self._match_texts(self.CONSTRAINT_PARSERS): 3598 return self.expression( 3599 exp.ColumnConstraint, 3600 this=this, 3601 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3602 ) 3603 3604 return this 3605 3606 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3607 if not self._match(TokenType.CONSTRAINT): 3608 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3609 3610 this = self._parse_id_var() 3611 expressions = [] 3612 3613 while True: 3614 constraint = self._parse_unnamed_constraint() or self._parse_function() 3615 if not constraint: 3616 break 3617 expressions.append(constraint) 3618 3619 return self.expression(exp.Constraint, this=this, expressions=expressions) 3620 3621 def _parse_unnamed_constraint( 3622 self, constraints: t.Optional[t.Collection[str]] = None 3623 ) -> t.Optional[exp.Expression]: 3624 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3625 return None 3626 3627 constraint = self._prev.text.upper() 3628 if constraint not in self.CONSTRAINT_PARSERS: 3629 self.raise_error(f"No parser found for schema constraint {constraint}.") 3630 3631 return self.CONSTRAINT_PARSERS[constraint](self) 3632 3633 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3634 self._match_text_seq("KEY") 3635 return self.expression( 3636 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3637 ) 3638 3639 def _parse_key_constraint_options(self) -> t.List[str]: 3640 options = [] 3641 while True: 3642 if not self._curr: 3643 break 3644 3645 if self._match(TokenType.ON): 3646 action = None 3647 on = self._advance_any() and self._prev.text 3648 3649 if self._match_text_seq("NO", "ACTION"): 3650 action = "NO ACTION" 3651 elif self._match_text_seq("CASCADE"): 3652 action = "CASCADE" 3653 elif self._match_pair(TokenType.SET, TokenType.NULL): 3654 action = "SET NULL" 3655 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3656 action = "SET DEFAULT" 3657 else: 3658 self.raise_error("Invalid key constraint") 3659 3660 options.append(f"ON {on} {action}") 3661 elif self._match_text_seq("NOT", "ENFORCED"): 3662 options.append("NOT ENFORCED") 3663 elif self._match_text_seq("DEFERRABLE"): 3664 options.append("DEFERRABLE") 3665 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3666 options.append("INITIALLY DEFERRED") 3667 elif self._match_text_seq("NORELY"): 3668 options.append("NORELY") 3669 elif self._match_text_seq("MATCH", "FULL"): 3670 options.append("MATCH FULL") 3671 else: 3672 break 3673 3674 return options 3675 3676 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3677 if match and not self._match(TokenType.REFERENCES): 3678 return None 3679 3680 expressions = None 3681 this = self._parse_table(schema=True) 3682 options = self._parse_key_constraint_options() 3683 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3684 3685 def _parse_foreign_key(self) -> exp.ForeignKey: 3686 expressions = self._parse_wrapped_id_vars() 3687 reference = self._parse_references() 3688 options = {} 3689 3690 while self._match(TokenType.ON): 3691 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3692 self.raise_error("Expected DELETE or UPDATE") 3693 3694 kind = self._prev.text.lower() 3695 3696 if self._match_text_seq("NO", "ACTION"): 3697 action = "NO ACTION" 3698 elif self._match(TokenType.SET): 3699 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3700 action = "SET " + self._prev.text.upper() 3701 else: 3702 self._advance() 3703 action = self._prev.text.upper() 3704 3705 options[kind] = action 3706 3707 return self.expression( 3708 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3709 ) 3710 3711 def _parse_primary_key( 3712 self, wrapped_optional: bool = False, in_props: bool = False 3713 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3714 desc = ( 3715 self._match_set((TokenType.ASC, TokenType.DESC)) 3716 and self._prev.token_type == TokenType.DESC 3717 ) 3718 3719 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3720 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3721 3722 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3723 options = self._parse_key_constraint_options() 3724 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3725 3726 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3727 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3728 return this 3729 3730 bracket_kind = self._prev.token_type 3731 3732 if self._match(TokenType.COLON): 3733 expressions: t.List[t.Optional[exp.Expression]] = [ 3734 self.expression(exp.Slice, expression=self._parse_conjunction()) 3735 ] 3736 else: 3737 expressions = self._parse_csv( 3738 lambda: self._parse_slice( 3739 self._parse_alias(self._parse_conjunction(), explicit=True) 3740 ) 3741 ) 3742 3743 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3744 if bracket_kind == TokenType.L_BRACE: 3745 this = self.expression(exp.Struct, expressions=expressions) 3746 elif not this or this.name.upper() == "ARRAY": 3747 this = self.expression(exp.Array, expressions=expressions) 3748 else: 3749 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3750 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3751 3752 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3753 self.raise_error("Expected ]") 3754 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3755 self.raise_error("Expected }") 3756 3757 self._add_comments(this) 3758 return self._parse_bracket(this) 3759 3760 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3761 if self._match(TokenType.COLON): 3762 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3763 return this 3764 3765 def _parse_case(self) -> t.Optional[exp.Expression]: 3766 ifs = [] 3767 default = None 3768 3769 comments = self._prev_comments 3770 expression = self._parse_conjunction() 3771 3772 while self._match(TokenType.WHEN): 3773 this = self._parse_conjunction() 3774 self._match(TokenType.THEN) 3775 then = self._parse_conjunction() 3776 ifs.append(self.expression(exp.If, this=this, true=then)) 3777 3778 if self._match(TokenType.ELSE): 3779 default = self._parse_conjunction() 3780 3781 if not self._match(TokenType.END): 3782 self.raise_error("Expected END after CASE", self._prev) 3783 3784 return self._parse_window( 3785 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3786 ) 3787 3788 def _parse_if(self) -> t.Optional[exp.Expression]: 3789 if self._match(TokenType.L_PAREN): 3790 args = self._parse_csv(self._parse_conjunction) 3791 this = self.validate_expression(exp.If.from_arg_list(args), args) 3792 self._match_r_paren() 3793 else: 3794 index = self._index - 1 3795 condition = self._parse_conjunction() 3796 3797 if not condition: 3798 self._retreat(index) 3799 return None 3800 3801 self._match(TokenType.THEN) 3802 true = self._parse_conjunction() 3803 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3804 self._match(TokenType.END) 3805 this = self.expression(exp.If, this=condition, true=true, false=false) 3806 3807 return self._parse_window(this) 3808 3809 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3810 if not self._match_text_seq("VALUE", "FOR"): 3811 self._retreat(self._index - 1) 3812 return None 3813 3814 return self.expression( 3815 exp.NextValueFor, 3816 this=self._parse_column(), 3817 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3818 ) 3819 3820 def _parse_extract(self) -> exp.Extract: 3821 this = self._parse_function() or self._parse_var() or self._parse_type() 3822 3823 if self._match(TokenType.FROM): 3824 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3825 3826 if not self._match(TokenType.COMMA): 3827 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3828 3829 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3830 3831 def _parse_any_value(self) -> exp.AnyValue: 3832 this = self._parse_lambda() 3833 is_max = None 3834 having = None 3835 3836 if self._match(TokenType.HAVING): 3837 self._match_texts(("MAX", "MIN")) 3838 is_max = self._prev.text == "MAX" 3839 having = self._parse_column() 3840 3841 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3842 3843 def _parse_cast(self, strict: bool) -> exp.Expression: 3844 this = self._parse_conjunction() 3845 3846 if not self._match(TokenType.ALIAS): 3847 if self._match(TokenType.COMMA): 3848 return self.expression( 3849 exp.CastToStrType, this=this, expression=self._parse_string() 3850 ) 3851 else: 3852 self.raise_error("Expected AS after CAST") 3853 3854 fmt = None 3855 to = self._parse_types() 3856 3857 if not to: 3858 self.raise_error("Expected TYPE after CAST") 3859 elif to.this == exp.DataType.Type.CHAR: 3860 if self._match(TokenType.CHARACTER_SET): 3861 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3862 elif self._match(TokenType.FORMAT): 3863 fmt_string = self._parse_string() 3864 fmt = self._parse_at_time_zone(fmt_string) 3865 3866 if to.this in exp.DataType.TEMPORAL_TYPES: 3867 this = self.expression( 3868 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3869 this=this, 3870 format=exp.Literal.string( 3871 format_time( 3872 fmt_string.this if fmt_string else "", 3873 self.FORMAT_MAPPING or self.TIME_MAPPING, 3874 self.FORMAT_TRIE or self.TIME_TRIE, 3875 ) 3876 ), 3877 ) 3878 3879 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3880 this.set("zone", fmt.args["zone"]) 3881 3882 return this 3883 3884 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3885 3886 def _parse_concat(self) -> t.Optional[exp.Expression]: 3887 args = self._parse_csv(self._parse_conjunction) 3888 if self.CONCAT_NULL_OUTPUTS_STRING: 3889 args = [ 3890 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3891 for arg in args 3892 if arg 3893 ] 3894 3895 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3896 # we find such a call we replace it with its argument. 3897 if len(args) == 1: 3898 return args[0] 3899 3900 return self.expression( 3901 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3902 ) 3903 3904 def _parse_string_agg(self) -> exp.Expression: 3905 if self._match(TokenType.DISTINCT): 3906 args: t.List[t.Optional[exp.Expression]] = [ 3907 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3908 ] 3909 if self._match(TokenType.COMMA): 3910 args.extend(self._parse_csv(self._parse_conjunction)) 3911 else: 3912 args = self._parse_csv(self._parse_conjunction) 3913 3914 index = self._index 3915 if not self._match(TokenType.R_PAREN) and args: 3916 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3917 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3918 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3919 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3920 3921 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3922 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3923 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3924 if not self._match_text_seq("WITHIN", "GROUP"): 3925 self._retreat(index) 3926 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3927 3928 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3929 order = self._parse_order(this=seq_get(args, 0)) 3930 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3931 3932 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3933 this = self._parse_bitwise() 3934 3935 if self._match(TokenType.USING): 3936 to: t.Optional[exp.Expression] = self.expression( 3937 exp.CharacterSet, this=self._parse_var() 3938 ) 3939 elif self._match(TokenType.COMMA): 3940 to = self._parse_types() 3941 else: 3942 to = None 3943 3944 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3945 3946 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3947 """ 3948 There are generally two variants of the DECODE function: 3949 3950 - DECODE(bin, charset) 3951 - DECODE(expression, search, result [, search, result] ... [, default]) 3952 3953 The second variant will always be parsed into a CASE expression. Note that NULL 3954 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3955 instead of relying on pattern matching. 3956 """ 3957 args = self._parse_csv(self._parse_conjunction) 3958 3959 if len(args) < 3: 3960 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3961 3962 expression, *expressions = args 3963 if not expression: 3964 return None 3965 3966 ifs = [] 3967 for search, result in zip(expressions[::2], expressions[1::2]): 3968 if not search or not result: 3969 return None 3970 3971 if isinstance(search, exp.Literal): 3972 ifs.append( 3973 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3974 ) 3975 elif isinstance(search, exp.Null): 3976 ifs.append( 3977 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3978 ) 3979 else: 3980 cond = exp.or_( 3981 exp.EQ(this=expression.copy(), expression=search), 3982 exp.and_( 3983 exp.Is(this=expression.copy(), expression=exp.Null()), 3984 exp.Is(this=search.copy(), expression=exp.Null()), 3985 copy=False, 3986 ), 3987 copy=False, 3988 ) 3989 ifs.append(exp.If(this=cond, true=result)) 3990 3991 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3992 3993 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3994 self._match_text_seq("KEY") 3995 key = self._parse_field() 3996 self._match(TokenType.COLON) 3997 self._match_text_seq("VALUE") 3998 value = self._parse_field() 3999 4000 if not key and not value: 4001 return None 4002 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4003 4004 def _parse_json_object(self) -> exp.JSONObject: 4005 star = self._parse_star() 4006 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4007 4008 null_handling = None 4009 if self._match_text_seq("NULL", "ON", "NULL"): 4010 null_handling = "NULL ON NULL" 4011 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4012 null_handling = "ABSENT ON NULL" 4013 4014 unique_keys = None 4015 if self._match_text_seq("WITH", "UNIQUE"): 4016 unique_keys = True 4017 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4018 unique_keys = False 4019 4020 self._match_text_seq("KEYS") 4021 4022 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4023 format_json = self._match_text_seq("FORMAT", "JSON") 4024 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4025 4026 return self.expression( 4027 exp.JSONObject, 4028 expressions=expressions, 4029 null_handling=null_handling, 4030 unique_keys=unique_keys, 4031 return_type=return_type, 4032 format_json=format_json, 4033 encoding=encoding, 4034 ) 4035 4036 def _parse_logarithm(self) -> exp.Func: 4037 # Default argument order is base, expression 4038 args = self._parse_csv(self._parse_range) 4039 4040 if len(args) > 1: 4041 if not self.LOG_BASE_FIRST: 4042 args.reverse() 4043 return exp.Log.from_arg_list(args) 4044 4045 return self.expression( 4046 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4047 ) 4048 4049 def _parse_match_against(self) -> exp.MatchAgainst: 4050 expressions = self._parse_csv(self._parse_column) 4051 4052 self._match_text_seq(")", "AGAINST", "(") 4053 4054 this = self._parse_string() 4055 4056 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4057 modifier = "IN NATURAL LANGUAGE MODE" 4058 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4059 modifier = f"{modifier} WITH QUERY EXPANSION" 4060 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4061 modifier = "IN BOOLEAN MODE" 4062 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4063 modifier = "WITH QUERY EXPANSION" 4064 else: 4065 modifier = None 4066 4067 return self.expression( 4068 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4069 ) 4070 4071 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4072 def _parse_open_json(self) -> exp.OpenJSON: 4073 this = self._parse_bitwise() 4074 path = self._match(TokenType.COMMA) and self._parse_string() 4075 4076 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4077 this = self._parse_field(any_token=True) 4078 kind = self._parse_types() 4079 path = self._parse_string() 4080 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4081 4082 return self.expression( 4083 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4084 ) 4085 4086 expressions = None 4087 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4088 self._match_l_paren() 4089 expressions = self._parse_csv(_parse_open_json_column_def) 4090 4091 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4092 4093 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4094 args = self._parse_csv(self._parse_bitwise) 4095 4096 if self._match(TokenType.IN): 4097 return self.expression( 4098 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4099 ) 4100 4101 if haystack_first: 4102 haystack = seq_get(args, 0) 4103 needle = seq_get(args, 1) 4104 else: 4105 needle = seq_get(args, 0) 4106 haystack = seq_get(args, 1) 4107 4108 return self.expression( 4109 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4110 ) 4111 4112 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4113 args = self._parse_csv(self._parse_table) 4114 return exp.JoinHint(this=func_name.upper(), expressions=args) 4115 4116 def _parse_substring(self) -> exp.Substring: 4117 # Postgres supports the form: substring(string [from int] [for int]) 4118 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4119 4120 args = self._parse_csv(self._parse_bitwise) 4121 4122 if self._match(TokenType.FROM): 4123 args.append(self._parse_bitwise()) 4124 if self._match(TokenType.FOR): 4125 args.append(self._parse_bitwise()) 4126 4127 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4128 4129 def _parse_trim(self) -> exp.Trim: 4130 # https://www.w3resource.com/sql/character-functions/trim.php 4131 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4132 4133 position = None 4134 collation = None 4135 4136 if self._match_texts(self.TRIM_TYPES): 4137 position = self._prev.text.upper() 4138 4139 expression = self._parse_bitwise() 4140 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4141 this = self._parse_bitwise() 4142 else: 4143 this = expression 4144 expression = None 4145 4146 if self._match(TokenType.COLLATE): 4147 collation = self._parse_bitwise() 4148 4149 return self.expression( 4150 exp.Trim, this=this, position=position, expression=expression, collation=collation 4151 ) 4152 4153 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4154 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4155 4156 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4157 return self._parse_window(self._parse_id_var(), alias=True) 4158 4159 def _parse_respect_or_ignore_nulls( 4160 self, this: t.Optional[exp.Expression] 4161 ) -> t.Optional[exp.Expression]: 4162 if self._match_text_seq("IGNORE", "NULLS"): 4163 return self.expression(exp.IgnoreNulls, this=this) 4164 if self._match_text_seq("RESPECT", "NULLS"): 4165 return self.expression(exp.RespectNulls, this=this) 4166 return this 4167 4168 def _parse_window( 4169 self, this: t.Optional[exp.Expression], alias: bool = False 4170 ) -> t.Optional[exp.Expression]: 4171 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4172 self._match(TokenType.WHERE) 4173 this = self.expression( 4174 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4175 ) 4176 self._match_r_paren() 4177 4178 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4179 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4180 if self._match_text_seq("WITHIN", "GROUP"): 4181 order = self._parse_wrapped(self._parse_order) 4182 this = self.expression(exp.WithinGroup, this=this, expression=order) 4183 4184 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4185 # Some dialects choose to implement and some do not. 4186 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4187 4188 # There is some code above in _parse_lambda that handles 4189 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4190 4191 # The below changes handle 4192 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4193 4194 # Oracle allows both formats 4195 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4196 # and Snowflake chose to do the same for familiarity 4197 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4198 this = self._parse_respect_or_ignore_nulls(this) 4199 4200 # bigquery select from window x AS (partition by ...) 4201 if alias: 4202 over = None 4203 self._match(TokenType.ALIAS) 4204 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4205 return this 4206 else: 4207 over = self._prev.text.upper() 4208 4209 if not self._match(TokenType.L_PAREN): 4210 return self.expression( 4211 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4212 ) 4213 4214 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4215 4216 first = self._match(TokenType.FIRST) 4217 if self._match_text_seq("LAST"): 4218 first = False 4219 4220 partition = self._parse_partition_by() 4221 order = self._parse_order() 4222 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4223 4224 if kind: 4225 self._match(TokenType.BETWEEN) 4226 start = self._parse_window_spec() 4227 self._match(TokenType.AND) 4228 end = self._parse_window_spec() 4229 4230 spec = self.expression( 4231 exp.WindowSpec, 4232 kind=kind, 4233 start=start["value"], 4234 start_side=start["side"], 4235 end=end["value"], 4236 end_side=end["side"], 4237 ) 4238 else: 4239 spec = None 4240 4241 self._match_r_paren() 4242 4243 window = self.expression( 4244 exp.Window, 4245 this=this, 4246 partition_by=partition, 4247 order=order, 4248 spec=spec, 4249 alias=window_alias, 4250 over=over, 4251 first=first, 4252 ) 4253 4254 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4255 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4256 return self._parse_window(window, alias=alias) 4257 4258 return window 4259 4260 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4261 self._match(TokenType.BETWEEN) 4262 4263 return { 4264 "value": ( 4265 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4266 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4267 or self._parse_bitwise() 4268 ), 4269 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4270 } 4271 4272 def _parse_alias( 4273 self, this: t.Optional[exp.Expression], explicit: bool = False 4274 ) -> t.Optional[exp.Expression]: 4275 any_token = self._match(TokenType.ALIAS) 4276 4277 if explicit and not any_token: 4278 return this 4279 4280 if self._match(TokenType.L_PAREN): 4281 aliases = self.expression( 4282 exp.Aliases, 4283 this=this, 4284 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4285 ) 4286 self._match_r_paren(aliases) 4287 return aliases 4288 4289 alias = self._parse_id_var(any_token) 4290 4291 if alias: 4292 return self.expression(exp.Alias, this=this, alias=alias) 4293 4294 return this 4295 4296 def _parse_id_var( 4297 self, 4298 any_token: bool = True, 4299 tokens: t.Optional[t.Collection[TokenType]] = None, 4300 ) -> t.Optional[exp.Expression]: 4301 identifier = self._parse_identifier() 4302 4303 if identifier: 4304 return identifier 4305 4306 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4307 quoted = self._prev.token_type == TokenType.STRING 4308 return exp.Identifier(this=self._prev.text, quoted=quoted) 4309 4310 return None 4311 4312 def _parse_string(self) -> t.Optional[exp.Expression]: 4313 if self._match(TokenType.STRING): 4314 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4315 return self._parse_placeholder() 4316 4317 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4318 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4319 4320 def _parse_number(self) -> t.Optional[exp.Expression]: 4321 if self._match(TokenType.NUMBER): 4322 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4323 return self._parse_placeholder() 4324 4325 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4326 if self._match(TokenType.IDENTIFIER): 4327 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4328 return self._parse_placeholder() 4329 4330 def _parse_var( 4331 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4332 ) -> t.Optional[exp.Expression]: 4333 if ( 4334 (any_token and self._advance_any()) 4335 or self._match(TokenType.VAR) 4336 or (self._match_set(tokens) if tokens else False) 4337 ): 4338 return self.expression(exp.Var, this=self._prev.text) 4339 return self._parse_placeholder() 4340 4341 def _advance_any(self) -> t.Optional[Token]: 4342 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4343 self._advance() 4344 return self._prev 4345 return None 4346 4347 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4348 return self._parse_var() or self._parse_string() 4349 4350 def _parse_null(self) -> t.Optional[exp.Expression]: 4351 if self._match(TokenType.NULL): 4352 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4353 return self._parse_placeholder() 4354 4355 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4356 if self._match(TokenType.TRUE): 4357 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4358 if self._match(TokenType.FALSE): 4359 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4360 return self._parse_placeholder() 4361 4362 def _parse_star(self) -> t.Optional[exp.Expression]: 4363 if self._match(TokenType.STAR): 4364 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4365 return self._parse_placeholder() 4366 4367 def _parse_parameter(self) -> exp.Parameter: 4368 wrapped = self._match(TokenType.L_BRACE) 4369 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4370 self._match(TokenType.R_BRACE) 4371 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4372 4373 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4374 if self._match_set(self.PLACEHOLDER_PARSERS): 4375 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4376 if placeholder: 4377 return placeholder 4378 self._advance(-1) 4379 return None 4380 4381 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4382 if not self._match(TokenType.EXCEPT): 4383 return None 4384 if self._match(TokenType.L_PAREN, advance=False): 4385 return self._parse_wrapped_csv(self._parse_column) 4386 return self._parse_csv(self._parse_column) 4387 4388 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4389 if not self._match(TokenType.REPLACE): 4390 return None 4391 if self._match(TokenType.L_PAREN, advance=False): 4392 return self._parse_wrapped_csv(self._parse_expression) 4393 return self._parse_expressions() 4394 4395 def _parse_csv( 4396 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4397 ) -> t.List[t.Optional[exp.Expression]]: 4398 parse_result = parse_method() 4399 items = [parse_result] if parse_result is not None else [] 4400 4401 while self._match(sep): 4402 self._add_comments(parse_result) 4403 parse_result = parse_method() 4404 if parse_result is not None: 4405 items.append(parse_result) 4406 4407 return items 4408 4409 def _parse_tokens( 4410 self, parse_method: t.Callable, expressions: t.Dict 4411 ) -> t.Optional[exp.Expression]: 4412 this = parse_method() 4413 4414 while self._match_set(expressions): 4415 this = self.expression( 4416 expressions[self._prev.token_type], 4417 this=this, 4418 comments=self._prev_comments, 4419 expression=parse_method(), 4420 ) 4421 4422 return this 4423 4424 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4425 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4426 4427 def _parse_wrapped_csv( 4428 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4429 ) -> t.List[t.Optional[exp.Expression]]: 4430 return self._parse_wrapped( 4431 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4432 ) 4433 4434 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4435 wrapped = self._match(TokenType.L_PAREN) 4436 if not wrapped and not optional: 4437 self.raise_error("Expecting (") 4438 parse_result = parse_method() 4439 if wrapped: 4440 self._match_r_paren() 4441 return parse_result 4442 4443 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4444 return self._parse_csv(self._parse_expression) 4445 4446 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4447 return self._parse_select() or self._parse_set_operations( 4448 self._parse_expression() if alias else self._parse_conjunction() 4449 ) 4450 4451 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4452 return self._parse_query_modifiers( 4453 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4454 ) 4455 4456 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4457 this = None 4458 if self._match_texts(self.TRANSACTION_KIND): 4459 this = self._prev.text 4460 4461 self._match_texts({"TRANSACTION", "WORK"}) 4462 4463 modes = [] 4464 while True: 4465 mode = [] 4466 while self._match(TokenType.VAR): 4467 mode.append(self._prev.text) 4468 4469 if mode: 4470 modes.append(" ".join(mode)) 4471 if not self._match(TokenType.COMMA): 4472 break 4473 4474 return self.expression(exp.Transaction, this=this, modes=modes) 4475 4476 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4477 chain = None 4478 savepoint = None 4479 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4480 4481 self._match_texts({"TRANSACTION", "WORK"}) 4482 4483 if self._match_text_seq("TO"): 4484 self._match_text_seq("SAVEPOINT") 4485 savepoint = self._parse_id_var() 4486 4487 if self._match(TokenType.AND): 4488 chain = not self._match_text_seq("NO") 4489 self._match_text_seq("CHAIN") 4490 4491 if is_rollback: 4492 return self.expression(exp.Rollback, savepoint=savepoint) 4493 4494 return self.expression(exp.Commit, chain=chain) 4495 4496 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4497 if not self._match_text_seq("ADD"): 4498 return None 4499 4500 self._match(TokenType.COLUMN) 4501 exists_column = self._parse_exists(not_=True) 4502 expression = self._parse_column_def(self._parse_field(any_token=True)) 4503 4504 if expression: 4505 expression.set("exists", exists_column) 4506 4507 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4508 if self._match_texts(("FIRST", "AFTER")): 4509 position = self._prev.text 4510 column_position = self.expression( 4511 exp.ColumnPosition, this=self._parse_column(), position=position 4512 ) 4513 expression.set("position", column_position) 4514 4515 return expression 4516 4517 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4518 drop = self._match(TokenType.DROP) and self._parse_drop() 4519 if drop and not isinstance(drop, exp.Command): 4520 drop.set("kind", drop.args.get("kind", "COLUMN")) 4521 return drop 4522 4523 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4524 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4525 return self.expression( 4526 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4527 ) 4528 4529 def _parse_add_constraint(self) -> exp.AddConstraint: 4530 this = None 4531 kind = self._prev.token_type 4532 4533 if kind == TokenType.CONSTRAINT: 4534 this = self._parse_id_var() 4535 4536 if self._match_text_seq("CHECK"): 4537 expression = self._parse_wrapped(self._parse_conjunction) 4538 enforced = self._match_text_seq("ENFORCED") 4539 4540 return self.expression( 4541 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4542 ) 4543 4544 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4545 expression = self._parse_foreign_key() 4546 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4547 expression = self._parse_primary_key() 4548 else: 4549 expression = None 4550 4551 return self.expression(exp.AddConstraint, this=this, expression=expression) 4552 4553 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4554 index = self._index - 1 4555 4556 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4557 return self._parse_csv(self._parse_add_constraint) 4558 4559 self._retreat(index) 4560 return self._parse_csv(self._parse_add_column) 4561 4562 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4563 self._match(TokenType.COLUMN) 4564 column = self._parse_field(any_token=True) 4565 4566 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4567 return self.expression(exp.AlterColumn, this=column, drop=True) 4568 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4569 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4570 4571 self._match_text_seq("SET", "DATA") 4572 return self.expression( 4573 exp.AlterColumn, 4574 this=column, 4575 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4576 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4577 using=self._match(TokenType.USING) and self._parse_conjunction(), 4578 ) 4579 4580 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4581 index = self._index - 1 4582 4583 partition_exists = self._parse_exists() 4584 if self._match(TokenType.PARTITION, advance=False): 4585 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4586 4587 self._retreat(index) 4588 return self._parse_csv(self._parse_drop_column) 4589 4590 def _parse_alter_table_rename(self) -> exp.RenameTable: 4591 self._match_text_seq("TO") 4592 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4593 4594 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4595 start = self._prev 4596 4597 if not self._match(TokenType.TABLE): 4598 return self._parse_as_command(start) 4599 4600 exists = self._parse_exists() 4601 this = self._parse_table(schema=True) 4602 4603 if self._next: 4604 self._advance() 4605 4606 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4607 if parser: 4608 actions = ensure_list(parser(self)) 4609 4610 if not self._curr: 4611 return self.expression( 4612 exp.AlterTable, 4613 this=this, 4614 exists=exists, 4615 actions=actions, 4616 ) 4617 return self._parse_as_command(start) 4618 4619 def _parse_merge(self) -> exp.Merge: 4620 self._match(TokenType.INTO) 4621 target = self._parse_table() 4622 4623 self._match(TokenType.USING) 4624 using = self._parse_table() 4625 4626 self._match(TokenType.ON) 4627 on = self._parse_conjunction() 4628 4629 whens = [] 4630 while self._match(TokenType.WHEN): 4631 matched = not self._match(TokenType.NOT) 4632 self._match_text_seq("MATCHED") 4633 source = ( 4634 False 4635 if self._match_text_seq("BY", "TARGET") 4636 else self._match_text_seq("BY", "SOURCE") 4637 ) 4638 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4639 4640 self._match(TokenType.THEN) 4641 4642 if self._match(TokenType.INSERT): 4643 _this = self._parse_star() 4644 if _this: 4645 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4646 else: 4647 then = self.expression( 4648 exp.Insert, 4649 this=self._parse_value(), 4650 expression=self._match(TokenType.VALUES) and self._parse_value(), 4651 ) 4652 elif self._match(TokenType.UPDATE): 4653 expressions = self._parse_star() 4654 if expressions: 4655 then = self.expression(exp.Update, expressions=expressions) 4656 else: 4657 then = self.expression( 4658 exp.Update, 4659 expressions=self._match(TokenType.SET) 4660 and self._parse_csv(self._parse_equality), 4661 ) 4662 elif self._match(TokenType.DELETE): 4663 then = self.expression(exp.Var, this=self._prev.text) 4664 else: 4665 then = None 4666 4667 whens.append( 4668 self.expression( 4669 exp.When, 4670 matched=matched, 4671 source=source, 4672 condition=condition, 4673 then=then, 4674 ) 4675 ) 4676 4677 return self.expression( 4678 exp.Merge, 4679 this=target, 4680 using=using, 4681 on=on, 4682 expressions=whens, 4683 ) 4684 4685 def _parse_show(self) -> t.Optional[exp.Expression]: 4686 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4687 if parser: 4688 return parser(self) 4689 self._advance() 4690 return self.expression(exp.Show, this=self._prev.text.upper()) 4691 4692 def _parse_set_item_assignment( 4693 self, kind: t.Optional[str] = None 4694 ) -> t.Optional[exp.Expression]: 4695 index = self._index 4696 4697 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4698 return self._parse_set_transaction(global_=kind == "GLOBAL") 4699 4700 left = self._parse_primary() or self._parse_id_var() 4701 4702 if not self._match_texts(("=", "TO")): 4703 self._retreat(index) 4704 return None 4705 4706 right = self._parse_statement() or self._parse_id_var() 4707 this = self.expression(exp.EQ, this=left, expression=right) 4708 4709 return self.expression(exp.SetItem, this=this, kind=kind) 4710 4711 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4712 self._match_text_seq("TRANSACTION") 4713 characteristics = self._parse_csv( 4714 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4715 ) 4716 return self.expression( 4717 exp.SetItem, 4718 expressions=characteristics, 4719 kind="TRANSACTION", 4720 **{"global": global_}, # type: ignore 4721 ) 4722 4723 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4724 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4725 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4726 4727 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4728 index = self._index 4729 set_ = self.expression( 4730 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4731 ) 4732 4733 if self._curr: 4734 self._retreat(index) 4735 return self._parse_as_command(self._prev) 4736 4737 return set_ 4738 4739 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4740 for option in options: 4741 if self._match_text_seq(*option.split(" ")): 4742 return exp.var(option) 4743 return None 4744 4745 def _parse_as_command(self, start: Token) -> exp.Command: 4746 while self._curr: 4747 self._advance() 4748 text = self._find_sql(start, self._prev) 4749 size = len(start.text) 4750 return exp.Command(this=text[:size], expression=text[size:]) 4751 4752 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4753 settings = [] 4754 4755 self._match_l_paren() 4756 kind = self._parse_id_var() 4757 4758 if self._match(TokenType.L_PAREN): 4759 while True: 4760 key = self._parse_id_var() 4761 value = self._parse_primary() 4762 4763 if not key and value is None: 4764 break 4765 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4766 self._match(TokenType.R_PAREN) 4767 4768 self._match_r_paren() 4769 4770 return self.expression( 4771 exp.DictProperty, 4772 this=this, 4773 kind=kind.this if kind else None, 4774 settings=settings, 4775 ) 4776 4777 def _parse_dict_range(self, this: str) -> exp.DictRange: 4778 self._match_l_paren() 4779 has_min = self._match_text_seq("MIN") 4780 if has_min: 4781 min = self._parse_var() or self._parse_primary() 4782 self._match_text_seq("MAX") 4783 max = self._parse_var() or self._parse_primary() 4784 else: 4785 max = self._parse_var() or self._parse_primary() 4786 min = exp.Literal.number(0) 4787 self._match_r_paren() 4788 return self.expression(exp.DictRange, this=this, min=min, max=max) 4789 4790 def _find_parser( 4791 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4792 ) -> t.Optional[t.Callable]: 4793 if not self._curr: 4794 return None 4795 4796 index = self._index 4797 this = [] 4798 while True: 4799 # The current token might be multiple words 4800 curr = self._curr.text.upper() 4801 key = curr.split(" ") 4802 this.append(curr) 4803 4804 self._advance() 4805 result, trie = in_trie(trie, key) 4806 if result == TrieResult.FAILED: 4807 break 4808 4809 if result == TrieResult.EXISTS: 4810 subparser = parsers[" ".join(this)] 4811 return subparser 4812 4813 self._retreat(index) 4814 return None 4815 4816 def _match(self, token_type, advance=True, expression=None): 4817 if not self._curr: 4818 return None 4819 4820 if self._curr.token_type == token_type: 4821 if advance: 4822 self._advance() 4823 self._add_comments(expression) 4824 return True 4825 4826 return None 4827 4828 def _match_set(self, types, advance=True): 4829 if not self._curr: 4830 return None 4831 4832 if self._curr.token_type in types: 4833 if advance: 4834 self._advance() 4835 return True 4836 4837 return None 4838 4839 def _match_pair(self, token_type_a, token_type_b, advance=True): 4840 if not self._curr or not self._next: 4841 return None 4842 4843 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4844 if advance: 4845 self._advance(2) 4846 return True 4847 4848 return None 4849 4850 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4851 if not self._match(TokenType.L_PAREN, expression=expression): 4852 self.raise_error("Expecting (") 4853 4854 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4855 if not self._match(TokenType.R_PAREN, expression=expression): 4856 self.raise_error("Expecting )") 4857 4858 def _match_texts(self, texts, advance=True): 4859 if self._curr and self._curr.text.upper() in texts: 4860 if advance: 4861 self._advance() 4862 return True 4863 return False 4864 4865 def _match_text_seq(self, *texts, advance=True): 4866 index = self._index 4867 for text in texts: 4868 if self._curr and self._curr.text.upper() == text: 4869 self._advance() 4870 else: 4871 self._retreat(index) 4872 return False 4873 4874 if not advance: 4875 self._retreat(index) 4876 4877 return True 4878 4879 @t.overload 4880 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4881 ... 4882 4883 @t.overload 4884 def _replace_columns_with_dots( 4885 self, this: t.Optional[exp.Expression] 4886 ) -> t.Optional[exp.Expression]: 4887 ... 4888 4889 def _replace_columns_with_dots(self, this): 4890 if isinstance(this, exp.Dot): 4891 exp.replace_children(this, self._replace_columns_with_dots) 4892 elif isinstance(this, exp.Column): 4893 exp.replace_children(this, self._replace_columns_with_dots) 4894 table = this.args.get("table") 4895 this = ( 4896 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4897 ) 4898 4899 return this 4900 4901 def _replace_lambda( 4902 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4903 ) -> t.Optional[exp.Expression]: 4904 if not node: 4905 return node 4906 4907 for column in node.find_all(exp.Column): 4908 if column.parts[0].name in lambda_variables: 4909 dot_or_id = column.to_dot() if column.table else column.this 4910 parent = column.parent 4911 4912 while isinstance(parent, exp.Dot): 4913 if not isinstance(parent.parent, exp.Dot): 4914 parent.replace(dot_or_id) 4915 break 4916 parent = parent.parent 4917 else: 4918 if column is node: 4919 node = dot_or_id 4920 else: 4921 column.replace(dot_or_id) 4922 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
858 def __init__( 859 self, 860 error_level: t.Optional[ErrorLevel] = None, 861 error_message_context: int = 100, 862 max_errors: int = 3, 863 ): 864 self.error_level = error_level or ErrorLevel.IMMEDIATE 865 self.error_message_context = error_message_context 866 self.max_errors = max_errors 867 self.reset()
879 def parse( 880 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 881 ) -> t.List[t.Optional[exp.Expression]]: 882 """ 883 Parses a list of tokens and returns a list of syntax trees, one tree 884 per parsed SQL statement. 885 886 Args: 887 raw_tokens: The list of tokens. 888 sql: The original SQL string, used to produce helpful debug messages. 889 890 Returns: 891 The list of the produced syntax trees. 892 """ 893 return self._parse( 894 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 895 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
897 def parse_into( 898 self, 899 expression_types: exp.IntoType, 900 raw_tokens: t.List[Token], 901 sql: t.Optional[str] = None, 902 ) -> t.List[t.Optional[exp.Expression]]: 903 """ 904 Parses a list of tokens into a given Expression type. If a collection of Expression 905 types is given instead, this method will try to parse the token list into each one 906 of them, stopping at the first for which the parsing succeeds. 907 908 Args: 909 expression_types: The expression type(s) to try and parse the token list into. 910 raw_tokens: The list of tokens. 911 sql: The original SQL string, used to produce helpful debug messages. 912 913 Returns: 914 The target Expression. 915 """ 916 errors = [] 917 for expression_type in ensure_list(expression_types): 918 parser = self.EXPRESSION_PARSERS.get(expression_type) 919 if not parser: 920 raise TypeError(f"No parser registered for {expression_type}") 921 922 try: 923 return self._parse(parser, raw_tokens, sql) 924 except ParseError as e: 925 e.errors[0]["into_expression"] = expression_type 926 errors.append(e) 927 928 raise ParseError( 929 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 930 errors=merge_errors(errors), 931 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
968 def check_errors(self) -> None: 969 """Logs or raises any found errors, depending on the chosen error level setting.""" 970 if self.error_level == ErrorLevel.WARN: 971 for error in self.errors: 972 logger.error(str(error)) 973 elif self.error_level == ErrorLevel.RAISE and self.errors: 974 raise ParseError( 975 concat_messages(self.errors, self.max_errors), 976 errors=merge_errors(self.errors), 977 )
Logs or raises any found errors, depending on the chosen error level setting.
979 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 980 """ 981 Appends an error in the list of recorded errors or raises it, depending on the chosen 982 error level setting. 983 """ 984 token = token or self._curr or self._prev or Token.string("") 985 start = token.start 986 end = token.end + 1 987 start_context = self.sql[max(start - self.error_message_context, 0) : start] 988 highlight = self.sql[start:end] 989 end_context = self.sql[end : end + self.error_message_context] 990 991 error = ParseError.new( 992 f"{message}. Line {token.line}, Col: {token.col}.\n" 993 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 994 description=message, 995 line=token.line, 996 col=token.col, 997 start_context=start_context, 998 highlight=highlight, 999 end_context=end_context, 1000 ) 1001 1002 if self.error_level == ErrorLevel.IMMEDIATE: 1003 raise error 1004 1005 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1007 def expression( 1008 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1009 ) -> E: 1010 """ 1011 Creates a new, validated Expression. 1012 1013 Args: 1014 exp_class: The expression class to instantiate. 1015 comments: An optional list of comments to attach to the expression. 1016 kwargs: The arguments to set for the expression along with their respective values. 1017 1018 Returns: 1019 The target expression. 1020 """ 1021 instance = exp_class(**kwargs) 1022 instance.add_comments(comments) if comments else self._add_comments(instance) 1023 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1030 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1031 """ 1032 Validates an Expression, making sure that all its mandatory arguments are set. 1033 1034 Args: 1035 expression: The expression to validate. 1036 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1037 1038 Returns: 1039 The validated expression. 1040 """ 1041 if self.error_level != ErrorLevel.IGNORE: 1042 for error_message in expression.error_messages(args): 1043 self.raise_error(error_message) 1044 1045 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.