sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.USE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 427 428 TIMES = { 429 TokenType.TIME, 430 TokenType.TIMETZ, 431 } 432 433 TIMESTAMPS = { 434 TokenType.TIMESTAMP, 435 TokenType.TIMESTAMPTZ, 436 TokenType.TIMESTAMPLTZ, 437 *TIMES, 438 } 439 440 SET_OPERATIONS = { 441 TokenType.UNION, 442 TokenType.INTERSECT, 443 TokenType.EXCEPT, 444 } 445 446 JOIN_METHODS = { 447 TokenType.NATURAL, 448 TokenType.ASOF, 449 } 450 451 JOIN_SIDES = { 452 TokenType.LEFT, 453 TokenType.RIGHT, 454 TokenType.FULL, 455 } 456 457 JOIN_KINDS = { 458 TokenType.INNER, 459 TokenType.OUTER, 460 TokenType.CROSS, 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 JOIN_HINTS: t.Set[str] = set() 466 467 LAMBDAS = { 468 TokenType.ARROW: lambda self, expressions: self.expression( 469 exp.Lambda, 470 this=self._replace_lambda( 471 self._parse_conjunction(), 472 {node.name for node in expressions}, 473 ), 474 expressions=expressions, 475 ), 476 TokenType.FARROW: lambda self, expressions: self.expression( 477 exp.Kwarg, 478 this=exp.var(expressions[0].name), 479 expression=self._parse_conjunction(), 480 ), 481 } 482 483 COLUMN_OPERATORS = { 484 TokenType.DOT: None, 485 TokenType.DCOLON: lambda self, this, to: self.expression( 486 exp.Cast if self.STRICT_CAST else exp.TryCast, 487 this=this, 488 to=to, 489 ), 490 TokenType.ARROW: lambda self, this, path: self.expression( 491 exp.JSONExtract, 492 this=this, 493 expression=path, 494 ), 495 TokenType.DARROW: lambda self, this, path: self.expression( 496 exp.JSONExtractScalar, 497 this=this, 498 expression=path, 499 ), 500 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 501 exp.JSONBExtract, 502 this=this, 503 expression=path, 504 ), 505 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 506 exp.JSONBExtractScalar, 507 this=this, 508 expression=path, 509 ), 510 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 511 exp.JSONBContains, 512 this=this, 513 expression=key, 514 ), 515 } 516 517 EXPRESSION_PARSERS = { 518 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 519 exp.Column: lambda self: self._parse_column(), 520 exp.Condition: lambda self: self._parse_conjunction(), 521 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 522 exp.Expression: lambda self: self._parse_statement(), 523 exp.From: lambda self: self._parse_from(), 524 exp.Group: lambda self: self._parse_group(), 525 exp.Having: lambda self: self._parse_having(), 526 exp.Identifier: lambda self: self._parse_id_var(), 527 exp.Join: lambda self: self._parse_join(), 528 exp.Lambda: lambda self: self._parse_lambda(), 529 exp.Lateral: lambda self: self._parse_lateral(), 530 exp.Limit: lambda self: self._parse_limit(), 531 exp.Offset: lambda self: self._parse_offset(), 532 exp.Order: lambda self: self._parse_order(), 533 exp.Ordered: lambda self: self._parse_ordered(), 534 exp.Properties: lambda self: self._parse_properties(), 535 exp.Qualify: lambda self: self._parse_qualify(), 536 exp.Returning: lambda self: self._parse_returning(), 537 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 538 exp.Table: lambda self: self._parse_table_parts(), 539 exp.TableAlias: lambda self: self._parse_table_alias(), 540 exp.Where: lambda self: self._parse_where(), 541 exp.Window: lambda self: self._parse_named_window(), 542 exp.With: lambda self: self._parse_with(), 543 "JOIN_TYPE": lambda self: self._parse_join_parts(), 544 } 545 546 STATEMENT_PARSERS = { 547 TokenType.ALTER: lambda self: self._parse_alter(), 548 TokenType.BEGIN: lambda self: self._parse_transaction(), 549 TokenType.CACHE: lambda self: self._parse_cache(), 550 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 551 TokenType.COMMENT: lambda self: self._parse_comment(), 552 TokenType.CREATE: lambda self: self._parse_create(), 553 TokenType.DELETE: lambda self: self._parse_delete(), 554 TokenType.DESC: lambda self: self._parse_describe(), 555 TokenType.DESCRIBE: lambda self: self._parse_describe(), 556 TokenType.DROP: lambda self: self._parse_drop(), 557 TokenType.INSERT: lambda self: self._parse_insert(), 558 TokenType.KILL: lambda self: self._parse_kill(), 559 TokenType.LOAD: lambda self: self._parse_load(), 560 TokenType.MERGE: lambda self: self._parse_merge(), 561 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 562 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 563 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 564 TokenType.SET: lambda self: self._parse_set(), 565 TokenType.UNCACHE: lambda self: self._parse_uncache(), 566 TokenType.UPDATE: lambda self: self._parse_update(), 567 TokenType.USE: lambda self: self.expression( 568 exp.Use, 569 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 570 and exp.var(self._prev.text), 571 this=self._parse_table(schema=False), 572 ), 573 } 574 575 UNARY_PARSERS = { 576 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 577 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 578 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 579 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 580 } 581 582 PRIMARY_PARSERS = { 583 TokenType.STRING: lambda self, token: self.expression( 584 exp.Literal, this=token.text, is_string=True 585 ), 586 TokenType.NUMBER: lambda self, token: self.expression( 587 exp.Literal, this=token.text, is_string=False 588 ), 589 TokenType.STAR: lambda self, _: self.expression( 590 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 591 ), 592 TokenType.NULL: lambda self, _: self.expression(exp.Null), 593 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 594 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 595 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 596 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 597 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 598 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 599 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 600 exp.National, this=token.text 601 ), 602 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 603 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 604 exp.RawString, this=token.text 605 ), 606 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 607 } 608 609 PLACEHOLDER_PARSERS = { 610 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 611 TokenType.PARAMETER: lambda self: self._parse_parameter(), 612 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 613 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 614 else None, 615 } 616 617 RANGE_PARSERS = { 618 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 619 TokenType.GLOB: binary_range_parser(exp.Glob), 620 TokenType.ILIKE: binary_range_parser(exp.ILike), 621 TokenType.IN: lambda self, this: self._parse_in(this), 622 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 623 TokenType.IS: lambda self, this: self._parse_is(this), 624 TokenType.LIKE: binary_range_parser(exp.Like), 625 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 626 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 627 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 628 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 629 } 630 631 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 632 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 633 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 634 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 635 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 636 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 637 "CHECKSUM": lambda self: self._parse_checksum(), 638 "CLUSTER BY": lambda self: self._parse_cluster(), 639 "CLUSTERED": lambda self: self._parse_clustered_by(), 640 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 641 exp.CollateProperty, **kwargs 642 ), 643 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 644 "COPY": lambda self: self._parse_copy_property(), 645 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 646 "DEFINER": lambda self: self._parse_definer(), 647 "DETERMINISTIC": lambda self: self.expression( 648 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 649 ), 650 "DISTKEY": lambda self: self._parse_distkey(), 651 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 652 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 653 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 654 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 655 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 656 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 657 "FREESPACE": lambda self: self._parse_freespace(), 658 "HEAP": lambda self: self.expression(exp.HeapProperty), 659 "IMMUTABLE": lambda self: self.expression( 660 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 661 ), 662 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 663 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 664 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 665 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 666 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 667 "LIKE": lambda self: self._parse_create_like(), 668 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 669 "LOCK": lambda self: self._parse_locking(), 670 "LOCKING": lambda self: self._parse_locking(), 671 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 672 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 673 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 674 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 675 "NO": lambda self: self._parse_no_property(), 676 "ON": lambda self: self._parse_on_property(), 677 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 678 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 679 "PARTITION": lambda self: self._parse_partitioned_of(), 680 "PARTITION BY": lambda self: self._parse_partitioned_by(), 681 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 682 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 683 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 684 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 685 "REMOTE": lambda self: self._parse_remote_with_connection(), 686 "RETURNS": lambda self: self._parse_returns(), 687 "ROW": lambda self: self._parse_row(), 688 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 689 "SAMPLE": lambda self: self.expression( 690 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 691 ), 692 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 693 "SETTINGS": lambda self: self.expression( 694 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 695 ), 696 "SORTKEY": lambda self: self._parse_sortkey(), 697 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 698 "STABLE": lambda self: self.expression( 699 exp.StabilityProperty, this=exp.Literal.string("STABLE") 700 ), 701 "STORED": lambda self: self._parse_stored(), 702 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 703 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 704 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 705 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 706 "TO": lambda self: self._parse_to_table(), 707 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 708 "TRANSFORM": lambda self: self.expression( 709 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 710 ), 711 "TTL": lambda self: self._parse_ttl(), 712 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 713 "VOLATILE": lambda self: self._parse_volatile_property(), 714 "WITH": lambda self: self._parse_with_property(), 715 } 716 717 CONSTRAINT_PARSERS = { 718 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 719 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 720 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 721 "CHARACTER SET": lambda self: self.expression( 722 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 723 ), 724 "CHECK": lambda self: self.expression( 725 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 726 ), 727 "COLLATE": lambda self: self.expression( 728 exp.CollateColumnConstraint, this=self._parse_var() 729 ), 730 "COMMENT": lambda self: self.expression( 731 exp.CommentColumnConstraint, this=self._parse_string() 732 ), 733 "COMPRESS": lambda self: self._parse_compress(), 734 "CLUSTERED": lambda self: self.expression( 735 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 736 ), 737 "NONCLUSTERED": lambda self: self.expression( 738 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 739 ), 740 "DEFAULT": lambda self: self.expression( 741 exp.DefaultColumnConstraint, this=self._parse_bitwise() 742 ), 743 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 744 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 745 "FORMAT": lambda self: self.expression( 746 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 747 ), 748 "GENERATED": lambda self: self._parse_generated_as_identity(), 749 "IDENTITY": lambda self: self._parse_auto_increment(), 750 "INLINE": lambda self: self._parse_inline(), 751 "LIKE": lambda self: self._parse_create_like(), 752 "NOT": lambda self: self._parse_not_constraint(), 753 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 754 "ON": lambda self: ( 755 self._match(TokenType.UPDATE) 756 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 757 ) 758 or self.expression(exp.OnProperty, this=self._parse_id_var()), 759 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 760 "PERIOD": lambda self: self._parse_period_for_system_time(), 761 "PRIMARY KEY": lambda self: self._parse_primary_key(), 762 "REFERENCES": lambda self: self._parse_references(match=False), 763 "TITLE": lambda self: self.expression( 764 exp.TitleColumnConstraint, this=self._parse_var_or_string() 765 ), 766 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 767 "UNIQUE": lambda self: self._parse_unique(), 768 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 769 "WITH": lambda self: self.expression( 770 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 771 ), 772 } 773 774 ALTER_PARSERS = { 775 "ADD": lambda self: self._parse_alter_table_add(), 776 "ALTER": lambda self: self._parse_alter_table_alter(), 777 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 778 "DROP": lambda self: self._parse_alter_table_drop(), 779 "RENAME": lambda self: self._parse_alter_table_rename(), 780 } 781 782 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 783 784 NO_PAREN_FUNCTION_PARSERS = { 785 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 786 "CASE": lambda self: self._parse_case(), 787 "IF": lambda self: self._parse_if(), 788 "NEXT": lambda self: self._parse_next_value_for(), 789 } 790 791 INVALID_FUNC_NAME_TOKENS = { 792 TokenType.IDENTIFIER, 793 TokenType.STRING, 794 } 795 796 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 797 798 FUNCTION_PARSERS = { 799 "ANY_VALUE": lambda self: self._parse_any_value(), 800 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 801 "CONCAT": lambda self: self._parse_concat(), 802 "CONCAT_WS": lambda self: self._parse_concat_ws(), 803 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 804 "DECODE": lambda self: self._parse_decode(), 805 "EXTRACT": lambda self: self._parse_extract(), 806 "JSON_OBJECT": lambda self: self._parse_json_object(), 807 "JSON_TABLE": lambda self: self._parse_json_table(), 808 "LOG": lambda self: self._parse_logarithm(), 809 "MATCH": lambda self: self._parse_match_against(), 810 "OPENJSON": lambda self: self._parse_open_json(), 811 "POSITION": lambda self: self._parse_position(), 812 "PREDICT": lambda self: self._parse_predict(), 813 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 814 "STRING_AGG": lambda self: self._parse_string_agg(), 815 "SUBSTRING": lambda self: self._parse_substring(), 816 "TRIM": lambda self: self._parse_trim(), 817 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 818 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 819 } 820 821 QUERY_MODIFIER_PARSERS = { 822 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 823 TokenType.WHERE: lambda self: ("where", self._parse_where()), 824 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 825 TokenType.HAVING: lambda self: ("having", self._parse_having()), 826 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 827 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 828 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 829 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 830 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 831 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 832 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 833 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 834 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 835 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 836 TokenType.CLUSTER_BY: lambda self: ( 837 "cluster", 838 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 839 ), 840 TokenType.DISTRIBUTE_BY: lambda self: ( 841 "distribute", 842 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 843 ), 844 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 845 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 846 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 847 } 848 849 SET_PARSERS = { 850 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 851 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 852 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 853 "TRANSACTION": lambda self: self._parse_set_transaction(), 854 } 855 856 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 857 858 TYPE_LITERAL_PARSERS = { 859 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 860 } 861 862 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 863 864 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 865 866 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 867 868 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 869 TRANSACTION_CHARACTERISTICS = { 870 "ISOLATION LEVEL REPEATABLE READ", 871 "ISOLATION LEVEL READ COMMITTED", 872 "ISOLATION LEVEL READ UNCOMMITTED", 873 "ISOLATION LEVEL SERIALIZABLE", 874 "READ WRITE", 875 "READ ONLY", 876 } 877 878 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 879 880 CLONE_KEYWORDS = {"CLONE", "COPY"} 881 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 882 883 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 884 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 885 886 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 887 888 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 889 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 890 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 891 892 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 893 894 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 895 896 DISTINCT_TOKENS = {TokenType.DISTINCT} 897 898 NULL_TOKENS = {TokenType.NULL} 899 900 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 901 902 STRICT_CAST = True 903 904 # A NULL arg in CONCAT yields NULL by default 905 CONCAT_NULL_OUTPUTS_STRING = False 906 907 PREFIXED_PIVOT_COLUMNS = False 908 IDENTIFY_PIVOT_STRINGS = False 909 910 LOG_BASE_FIRST = True 911 LOG_DEFAULTS_TO_LN = False 912 913 # Whether or not ADD is present for each column added by ALTER TABLE 914 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 915 916 # Whether or not the table sample clause expects CSV syntax 917 TABLESAMPLE_CSV = False 918 919 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 920 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 921 922 # Whether the TRIM function expects the characters to trim as its first argument 923 TRIM_PATTERN_FIRST = False 924 925 __slots__ = ( 926 "error_level", 927 "error_message_context", 928 "max_errors", 929 "sql", 930 "errors", 931 "_tokens", 932 "_index", 933 "_curr", 934 "_next", 935 "_prev", 936 "_prev_comments", 937 "_tokenizer", 938 ) 939 940 # Autofilled 941 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 942 INDEX_OFFSET: int = 0 943 UNNEST_COLUMN_ONLY: bool = False 944 ALIAS_POST_TABLESAMPLE: bool = False 945 STRICT_STRING_CONCAT = False 946 SUPPORTS_USER_DEFINED_TYPES = True 947 NORMALIZE_FUNCTIONS = "upper" 948 NULL_ORDERING: str = "nulls_are_small" 949 SHOW_TRIE: t.Dict = {} 950 SET_TRIE: t.Dict = {} 951 FORMAT_MAPPING: t.Dict[str, str] = {} 952 FORMAT_TRIE: t.Dict = {} 953 TIME_MAPPING: t.Dict[str, str] = {} 954 TIME_TRIE: t.Dict = {} 955 956 def __init__( 957 self, 958 error_level: t.Optional[ErrorLevel] = None, 959 error_message_context: int = 100, 960 max_errors: int = 3, 961 ): 962 self.error_level = error_level or ErrorLevel.IMMEDIATE 963 self.error_message_context = error_message_context 964 self.max_errors = max_errors 965 self._tokenizer = self.TOKENIZER_CLASS() 966 self.reset() 967 968 def reset(self): 969 self.sql = "" 970 self.errors = [] 971 self._tokens = [] 972 self._index = 0 973 self._curr = None 974 self._next = None 975 self._prev = None 976 self._prev_comments = None 977 978 def parse( 979 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens and returns a list of syntax trees, one tree 983 per parsed SQL statement. 984 985 Args: 986 raw_tokens: The list of tokens. 987 sql: The original SQL string, used to produce helpful debug messages. 988 989 Returns: 990 The list of the produced syntax trees. 991 """ 992 return self._parse( 993 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 994 ) 995 996 def parse_into( 997 self, 998 expression_types: exp.IntoType, 999 raw_tokens: t.List[Token], 1000 sql: t.Optional[str] = None, 1001 ) -> t.List[t.Optional[exp.Expression]]: 1002 """ 1003 Parses a list of tokens into a given Expression type. If a collection of Expression 1004 types is given instead, this method will try to parse the token list into each one 1005 of them, stopping at the first for which the parsing succeeds. 1006 1007 Args: 1008 expression_types: The expression type(s) to try and parse the token list into. 1009 raw_tokens: The list of tokens. 1010 sql: The original SQL string, used to produce helpful debug messages. 1011 1012 Returns: 1013 The target Expression. 1014 """ 1015 errors = [] 1016 for expression_type in ensure_list(expression_types): 1017 parser = self.EXPRESSION_PARSERS.get(expression_type) 1018 if not parser: 1019 raise TypeError(f"No parser registered for {expression_type}") 1020 1021 try: 1022 return self._parse(parser, raw_tokens, sql) 1023 except ParseError as e: 1024 e.errors[0]["into_expression"] = expression_type 1025 errors.append(e) 1026 1027 raise ParseError( 1028 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1029 errors=merge_errors(errors), 1030 ) from errors[-1] 1031 1032 def _parse( 1033 self, 1034 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1035 raw_tokens: t.List[Token], 1036 sql: t.Optional[str] = None, 1037 ) -> t.List[t.Optional[exp.Expression]]: 1038 self.reset() 1039 self.sql = sql or "" 1040 1041 total = len(raw_tokens) 1042 chunks: t.List[t.List[Token]] = [[]] 1043 1044 for i, token in enumerate(raw_tokens): 1045 if token.token_type == TokenType.SEMICOLON: 1046 if i < total - 1: 1047 chunks.append([]) 1048 else: 1049 chunks[-1].append(token) 1050 1051 expressions = [] 1052 1053 for tokens in chunks: 1054 self._index = -1 1055 self._tokens = tokens 1056 self._advance() 1057 1058 expressions.append(parse_method(self)) 1059 1060 if self._index < len(self._tokens): 1061 self.raise_error("Invalid expression / Unexpected token") 1062 1063 self.check_errors() 1064 1065 return expressions 1066 1067 def check_errors(self) -> None: 1068 """Logs or raises any found errors, depending on the chosen error level setting.""" 1069 if self.error_level == ErrorLevel.WARN: 1070 for error in self.errors: 1071 logger.error(str(error)) 1072 elif self.error_level == ErrorLevel.RAISE and self.errors: 1073 raise ParseError( 1074 concat_messages(self.errors, self.max_errors), 1075 errors=merge_errors(self.errors), 1076 ) 1077 1078 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1079 """ 1080 Appends an error in the list of recorded errors or raises it, depending on the chosen 1081 error level setting. 1082 """ 1083 token = token or self._curr or self._prev or Token.string("") 1084 start = token.start 1085 end = token.end + 1 1086 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1087 highlight = self.sql[start:end] 1088 end_context = self.sql[end : end + self.error_message_context] 1089 1090 error = ParseError.new( 1091 f"{message}. Line {token.line}, Col: {token.col}.\n" 1092 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1093 description=message, 1094 line=token.line, 1095 col=token.col, 1096 start_context=start_context, 1097 highlight=highlight, 1098 end_context=end_context, 1099 ) 1100 1101 if self.error_level == ErrorLevel.IMMEDIATE: 1102 raise error 1103 1104 self.errors.append(error) 1105 1106 def expression( 1107 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1108 ) -> E: 1109 """ 1110 Creates a new, validated Expression. 1111 1112 Args: 1113 exp_class: The expression class to instantiate. 1114 comments: An optional list of comments to attach to the expression. 1115 kwargs: The arguments to set for the expression along with their respective values. 1116 1117 Returns: 1118 The target expression. 1119 """ 1120 instance = exp_class(**kwargs) 1121 instance.add_comments(comments) if comments else self._add_comments(instance) 1122 return self.validate_expression(instance) 1123 1124 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1125 if expression and self._prev_comments: 1126 expression.add_comments(self._prev_comments) 1127 self._prev_comments = None 1128 1129 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1130 """ 1131 Validates an Expression, making sure that all its mandatory arguments are set. 1132 1133 Args: 1134 expression: The expression to validate. 1135 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1136 1137 Returns: 1138 The validated expression. 1139 """ 1140 if self.error_level != ErrorLevel.IGNORE: 1141 for error_message in expression.error_messages(args): 1142 self.raise_error(error_message) 1143 1144 return expression 1145 1146 def _find_sql(self, start: Token, end: Token) -> str: 1147 return self.sql[start.start : end.end + 1] 1148 1149 def _advance(self, times: int = 1) -> None: 1150 self._index += times 1151 self._curr = seq_get(self._tokens, self._index) 1152 self._next = seq_get(self._tokens, self._index + 1) 1153 1154 if self._index > 0: 1155 self._prev = self._tokens[self._index - 1] 1156 self._prev_comments = self._prev.comments 1157 else: 1158 self._prev = None 1159 self._prev_comments = None 1160 1161 def _retreat(self, index: int) -> None: 1162 if index != self._index: 1163 self._advance(index - self._index) 1164 1165 def _parse_command(self) -> exp.Command: 1166 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1167 1168 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1169 start = self._prev 1170 exists = self._parse_exists() if allow_exists else None 1171 1172 self._match(TokenType.ON) 1173 1174 kind = self._match_set(self.CREATABLES) and self._prev 1175 if not kind: 1176 return self._parse_as_command(start) 1177 1178 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1179 this = self._parse_user_defined_function(kind=kind.token_type) 1180 elif kind.token_type == TokenType.TABLE: 1181 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1182 elif kind.token_type == TokenType.COLUMN: 1183 this = self._parse_column() 1184 else: 1185 this = self._parse_id_var() 1186 1187 self._match(TokenType.IS) 1188 1189 return self.expression( 1190 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1191 ) 1192 1193 def _parse_to_table( 1194 self, 1195 ) -> exp.ToTableProperty: 1196 table = self._parse_table_parts(schema=True) 1197 return self.expression(exp.ToTableProperty, this=table) 1198 1199 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1200 def _parse_ttl(self) -> exp.Expression: 1201 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1202 this = self._parse_bitwise() 1203 1204 if self._match_text_seq("DELETE"): 1205 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1206 if self._match_text_seq("RECOMPRESS"): 1207 return self.expression( 1208 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1209 ) 1210 if self._match_text_seq("TO", "DISK"): 1211 return self.expression( 1212 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1213 ) 1214 if self._match_text_seq("TO", "VOLUME"): 1215 return self.expression( 1216 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1217 ) 1218 1219 return this 1220 1221 expressions = self._parse_csv(_parse_ttl_action) 1222 where = self._parse_where() 1223 group = self._parse_group() 1224 1225 aggregates = None 1226 if group and self._match(TokenType.SET): 1227 aggregates = self._parse_csv(self._parse_set_item) 1228 1229 return self.expression( 1230 exp.MergeTreeTTL, 1231 expressions=expressions, 1232 where=where, 1233 group=group, 1234 aggregates=aggregates, 1235 ) 1236 1237 def _parse_statement(self) -> t.Optional[exp.Expression]: 1238 if self._curr is None: 1239 return None 1240 1241 if self._match_set(self.STATEMENT_PARSERS): 1242 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1243 1244 if self._match_set(Tokenizer.COMMANDS): 1245 return self._parse_command() 1246 1247 expression = self._parse_expression() 1248 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1249 return self._parse_query_modifiers(expression) 1250 1251 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1252 start = self._prev 1253 temporary = self._match(TokenType.TEMPORARY) 1254 materialized = self._match_text_seq("MATERIALIZED") 1255 1256 kind = self._match_set(self.CREATABLES) and self._prev.text 1257 if not kind: 1258 return self._parse_as_command(start) 1259 1260 return self.expression( 1261 exp.Drop, 1262 comments=start.comments, 1263 exists=exists or self._parse_exists(), 1264 this=self._parse_table(schema=True), 1265 kind=kind, 1266 temporary=temporary, 1267 materialized=materialized, 1268 cascade=self._match_text_seq("CASCADE"), 1269 constraints=self._match_text_seq("CONSTRAINTS"), 1270 purge=self._match_text_seq("PURGE"), 1271 ) 1272 1273 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1274 return ( 1275 self._match_text_seq("IF") 1276 and (not not_ or self._match(TokenType.NOT)) 1277 and self._match(TokenType.EXISTS) 1278 ) 1279 1280 def _parse_create(self) -> exp.Create | exp.Command: 1281 # Note: this can't be None because we've matched a statement parser 1282 start = self._prev 1283 comments = self._prev_comments 1284 1285 replace = start.text.upper() == "REPLACE" or self._match_pair( 1286 TokenType.OR, TokenType.REPLACE 1287 ) 1288 unique = self._match(TokenType.UNIQUE) 1289 1290 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1291 self._advance() 1292 1293 properties = None 1294 create_token = self._match_set(self.CREATABLES) and self._prev 1295 1296 if not create_token: 1297 # exp.Properties.Location.POST_CREATE 1298 properties = self._parse_properties() 1299 create_token = self._match_set(self.CREATABLES) and self._prev 1300 1301 if not properties or not create_token: 1302 return self._parse_as_command(start) 1303 1304 exists = self._parse_exists(not_=True) 1305 this = None 1306 expression: t.Optional[exp.Expression] = None 1307 indexes = None 1308 no_schema_binding = None 1309 begin = None 1310 end = None 1311 clone = None 1312 1313 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1314 nonlocal properties 1315 if properties and temp_props: 1316 properties.expressions.extend(temp_props.expressions) 1317 elif temp_props: 1318 properties = temp_props 1319 1320 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1321 this = self._parse_user_defined_function(kind=create_token.token_type) 1322 1323 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1324 extend_props(self._parse_properties()) 1325 1326 self._match(TokenType.ALIAS) 1327 1328 if self._match(TokenType.COMMAND): 1329 expression = self._parse_as_command(self._prev) 1330 else: 1331 begin = self._match(TokenType.BEGIN) 1332 return_ = self._match_text_seq("RETURN") 1333 1334 if self._match(TokenType.STRING, advance=False): 1335 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1336 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1337 expression = self._parse_string() 1338 extend_props(self._parse_properties()) 1339 else: 1340 expression = self._parse_statement() 1341 1342 end = self._match_text_seq("END") 1343 1344 if return_: 1345 expression = self.expression(exp.Return, this=expression) 1346 elif create_token.token_type == TokenType.INDEX: 1347 this = self._parse_index(index=self._parse_id_var()) 1348 elif create_token.token_type in self.DB_CREATABLES: 1349 table_parts = self._parse_table_parts(schema=True) 1350 1351 # exp.Properties.Location.POST_NAME 1352 self._match(TokenType.COMMA) 1353 extend_props(self._parse_properties(before=True)) 1354 1355 this = self._parse_schema(this=table_parts) 1356 1357 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1358 extend_props(self._parse_properties()) 1359 1360 self._match(TokenType.ALIAS) 1361 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1362 # exp.Properties.Location.POST_ALIAS 1363 extend_props(self._parse_properties()) 1364 1365 expression = self._parse_ddl_select() 1366 1367 if create_token.token_type == TokenType.TABLE: 1368 # exp.Properties.Location.POST_EXPRESSION 1369 extend_props(self._parse_properties()) 1370 1371 indexes = [] 1372 while True: 1373 index = self._parse_index() 1374 1375 # exp.Properties.Location.POST_INDEX 1376 extend_props(self._parse_properties()) 1377 1378 if not index: 1379 break 1380 else: 1381 self._match(TokenType.COMMA) 1382 indexes.append(index) 1383 elif create_token.token_type == TokenType.VIEW: 1384 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1385 no_schema_binding = True 1386 1387 shallow = self._match_text_seq("SHALLOW") 1388 1389 if self._match_texts(self.CLONE_KEYWORDS): 1390 copy = self._prev.text.lower() == "copy" 1391 clone = self._parse_table(schema=True) 1392 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1393 clone_kind = ( 1394 self._match(TokenType.L_PAREN) 1395 and self._match_texts(self.CLONE_KINDS) 1396 and self._prev.text.upper() 1397 ) 1398 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1399 self._match(TokenType.R_PAREN) 1400 clone = self.expression( 1401 exp.Clone, 1402 this=clone, 1403 when=when, 1404 kind=clone_kind, 1405 shallow=shallow, 1406 expression=clone_expression, 1407 copy=copy, 1408 ) 1409 1410 return self.expression( 1411 exp.Create, 1412 comments=comments, 1413 this=this, 1414 kind=create_token.text, 1415 replace=replace, 1416 unique=unique, 1417 expression=expression, 1418 exists=exists, 1419 properties=properties, 1420 indexes=indexes, 1421 no_schema_binding=no_schema_binding, 1422 begin=begin, 1423 end=end, 1424 clone=clone, 1425 ) 1426 1427 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1428 # only used for teradata currently 1429 self._match(TokenType.COMMA) 1430 1431 kwargs = { 1432 "no": self._match_text_seq("NO"), 1433 "dual": self._match_text_seq("DUAL"), 1434 "before": self._match_text_seq("BEFORE"), 1435 "default": self._match_text_seq("DEFAULT"), 1436 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1437 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1438 "after": self._match_text_seq("AFTER"), 1439 "minimum": self._match_texts(("MIN", "MINIMUM")), 1440 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1441 } 1442 1443 if self._match_texts(self.PROPERTY_PARSERS): 1444 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1445 try: 1446 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1447 except TypeError: 1448 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1449 1450 return None 1451 1452 def _parse_property(self) -> t.Optional[exp.Expression]: 1453 if self._match_texts(self.PROPERTY_PARSERS): 1454 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1455 1456 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1457 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1458 1459 if self._match_text_seq("COMPOUND", "SORTKEY"): 1460 return self._parse_sortkey(compound=True) 1461 1462 if self._match_text_seq("SQL", "SECURITY"): 1463 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1464 1465 index = self._index 1466 key = self._parse_column() 1467 1468 if not self._match(TokenType.EQ): 1469 self._retreat(index) 1470 return None 1471 1472 return self.expression( 1473 exp.Property, 1474 this=key.to_dot() if isinstance(key, exp.Column) else key, 1475 value=self._parse_column() or self._parse_var(any_token=True), 1476 ) 1477 1478 def _parse_stored(self) -> exp.FileFormatProperty: 1479 self._match(TokenType.ALIAS) 1480 1481 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1482 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1483 1484 return self.expression( 1485 exp.FileFormatProperty, 1486 this=self.expression( 1487 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1488 ) 1489 if input_format or output_format 1490 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1491 ) 1492 1493 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1494 self._match(TokenType.EQ) 1495 self._match(TokenType.ALIAS) 1496 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1497 1498 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1499 properties = [] 1500 while True: 1501 if before: 1502 prop = self._parse_property_before() 1503 else: 1504 prop = self._parse_property() 1505 1506 if not prop: 1507 break 1508 for p in ensure_list(prop): 1509 properties.append(p) 1510 1511 if properties: 1512 return self.expression(exp.Properties, expressions=properties) 1513 1514 return None 1515 1516 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1517 return self.expression( 1518 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1519 ) 1520 1521 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1522 if self._index >= 2: 1523 pre_volatile_token = self._tokens[self._index - 2] 1524 else: 1525 pre_volatile_token = None 1526 1527 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1528 return exp.VolatileProperty() 1529 1530 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1531 1532 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1533 self._match_pair(TokenType.EQ, TokenType.ON) 1534 1535 prop = self.expression(exp.WithSystemVersioningProperty) 1536 if self._match(TokenType.L_PAREN): 1537 self._match_text_seq("HISTORY_TABLE", "=") 1538 prop.set("this", self._parse_table_parts()) 1539 1540 if self._match(TokenType.COMMA): 1541 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1542 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1543 1544 self._match_r_paren() 1545 1546 return prop 1547 1548 def _parse_with_property( 1549 self, 1550 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1551 if self._match(TokenType.L_PAREN, advance=False): 1552 return self._parse_wrapped_csv(self._parse_property) 1553 1554 if self._match_text_seq("JOURNAL"): 1555 return self._parse_withjournaltable() 1556 1557 if self._match_text_seq("DATA"): 1558 return self._parse_withdata(no=False) 1559 elif self._match_text_seq("NO", "DATA"): 1560 return self._parse_withdata(no=True) 1561 1562 if not self._next: 1563 return None 1564 1565 return self._parse_withisolatedloading() 1566 1567 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1568 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1569 self._match(TokenType.EQ) 1570 1571 user = self._parse_id_var() 1572 self._match(TokenType.PARAMETER) 1573 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1574 1575 if not user or not host: 1576 return None 1577 1578 return exp.DefinerProperty(this=f"{user}@{host}") 1579 1580 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1581 self._match(TokenType.TABLE) 1582 self._match(TokenType.EQ) 1583 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1584 1585 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1586 return self.expression(exp.LogProperty, no=no) 1587 1588 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1589 return self.expression(exp.JournalProperty, **kwargs) 1590 1591 def _parse_checksum(self) -> exp.ChecksumProperty: 1592 self._match(TokenType.EQ) 1593 1594 on = None 1595 if self._match(TokenType.ON): 1596 on = True 1597 elif self._match_text_seq("OFF"): 1598 on = False 1599 1600 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1601 1602 def _parse_cluster(self) -> exp.Cluster: 1603 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1604 1605 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1606 self._match_text_seq("BY") 1607 1608 self._match_l_paren() 1609 expressions = self._parse_csv(self._parse_column) 1610 self._match_r_paren() 1611 1612 if self._match_text_seq("SORTED", "BY"): 1613 self._match_l_paren() 1614 sorted_by = self._parse_csv(self._parse_ordered) 1615 self._match_r_paren() 1616 else: 1617 sorted_by = None 1618 1619 self._match(TokenType.INTO) 1620 buckets = self._parse_number() 1621 self._match_text_seq("BUCKETS") 1622 1623 return self.expression( 1624 exp.ClusteredByProperty, 1625 expressions=expressions, 1626 sorted_by=sorted_by, 1627 buckets=buckets, 1628 ) 1629 1630 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1631 if not self._match_text_seq("GRANTS"): 1632 self._retreat(self._index - 1) 1633 return None 1634 1635 return self.expression(exp.CopyGrantsProperty) 1636 1637 def _parse_freespace(self) -> exp.FreespaceProperty: 1638 self._match(TokenType.EQ) 1639 return self.expression( 1640 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1641 ) 1642 1643 def _parse_mergeblockratio( 1644 self, no: bool = False, default: bool = False 1645 ) -> exp.MergeBlockRatioProperty: 1646 if self._match(TokenType.EQ): 1647 return self.expression( 1648 exp.MergeBlockRatioProperty, 1649 this=self._parse_number(), 1650 percent=self._match(TokenType.PERCENT), 1651 ) 1652 1653 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1654 1655 def _parse_datablocksize( 1656 self, 1657 default: t.Optional[bool] = None, 1658 minimum: t.Optional[bool] = None, 1659 maximum: t.Optional[bool] = None, 1660 ) -> exp.DataBlocksizeProperty: 1661 self._match(TokenType.EQ) 1662 size = self._parse_number() 1663 1664 units = None 1665 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1666 units = self._prev.text 1667 1668 return self.expression( 1669 exp.DataBlocksizeProperty, 1670 size=size, 1671 units=units, 1672 default=default, 1673 minimum=minimum, 1674 maximum=maximum, 1675 ) 1676 1677 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1678 self._match(TokenType.EQ) 1679 always = self._match_text_seq("ALWAYS") 1680 manual = self._match_text_seq("MANUAL") 1681 never = self._match_text_seq("NEVER") 1682 default = self._match_text_seq("DEFAULT") 1683 1684 autotemp = None 1685 if self._match_text_seq("AUTOTEMP"): 1686 autotemp = self._parse_schema() 1687 1688 return self.expression( 1689 exp.BlockCompressionProperty, 1690 always=always, 1691 manual=manual, 1692 never=never, 1693 default=default, 1694 autotemp=autotemp, 1695 ) 1696 1697 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1698 no = self._match_text_seq("NO") 1699 concurrent = self._match_text_seq("CONCURRENT") 1700 self._match_text_seq("ISOLATED", "LOADING") 1701 for_all = self._match_text_seq("FOR", "ALL") 1702 for_insert = self._match_text_seq("FOR", "INSERT") 1703 for_none = self._match_text_seq("FOR", "NONE") 1704 return self.expression( 1705 exp.IsolatedLoadingProperty, 1706 no=no, 1707 concurrent=concurrent, 1708 for_all=for_all, 1709 for_insert=for_insert, 1710 for_none=for_none, 1711 ) 1712 1713 def _parse_locking(self) -> exp.LockingProperty: 1714 if self._match(TokenType.TABLE): 1715 kind = "TABLE" 1716 elif self._match(TokenType.VIEW): 1717 kind = "VIEW" 1718 elif self._match(TokenType.ROW): 1719 kind = "ROW" 1720 elif self._match_text_seq("DATABASE"): 1721 kind = "DATABASE" 1722 else: 1723 kind = None 1724 1725 if kind in ("DATABASE", "TABLE", "VIEW"): 1726 this = self._parse_table_parts() 1727 else: 1728 this = None 1729 1730 if self._match(TokenType.FOR): 1731 for_or_in = "FOR" 1732 elif self._match(TokenType.IN): 1733 for_or_in = "IN" 1734 else: 1735 for_or_in = None 1736 1737 if self._match_text_seq("ACCESS"): 1738 lock_type = "ACCESS" 1739 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1740 lock_type = "EXCLUSIVE" 1741 elif self._match_text_seq("SHARE"): 1742 lock_type = "SHARE" 1743 elif self._match_text_seq("READ"): 1744 lock_type = "READ" 1745 elif self._match_text_seq("WRITE"): 1746 lock_type = "WRITE" 1747 elif self._match_text_seq("CHECKSUM"): 1748 lock_type = "CHECKSUM" 1749 else: 1750 lock_type = None 1751 1752 override = self._match_text_seq("OVERRIDE") 1753 1754 return self.expression( 1755 exp.LockingProperty, 1756 this=this, 1757 kind=kind, 1758 for_or_in=for_or_in, 1759 lock_type=lock_type, 1760 override=override, 1761 ) 1762 1763 def _parse_partition_by(self) -> t.List[exp.Expression]: 1764 if self._match(TokenType.PARTITION_BY): 1765 return self._parse_csv(self._parse_conjunction) 1766 return [] 1767 1768 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1769 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1770 if self._match_text_seq("MINVALUE"): 1771 return exp.var("MINVALUE") 1772 if self._match_text_seq("MAXVALUE"): 1773 return exp.var("MAXVALUE") 1774 return self._parse_bitwise() 1775 1776 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1777 expression = None 1778 from_expressions = None 1779 to_expressions = None 1780 1781 if self._match(TokenType.IN): 1782 this = self._parse_wrapped_csv(self._parse_bitwise) 1783 elif self._match(TokenType.FROM): 1784 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1785 self._match_text_seq("TO") 1786 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1787 elif self._match_text_seq("WITH", "(", "MODULUS"): 1788 this = self._parse_number() 1789 self._match_text_seq(",", "REMAINDER") 1790 expression = self._parse_number() 1791 self._match_r_paren() 1792 else: 1793 self.raise_error("Failed to parse partition bound spec.") 1794 1795 return self.expression( 1796 exp.PartitionBoundSpec, 1797 this=this, 1798 expression=expression, 1799 from_expressions=from_expressions, 1800 to_expressions=to_expressions, 1801 ) 1802 1803 # https://www.postgresql.org/docs/current/sql-createtable.html 1804 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1805 if not self._match_text_seq("OF"): 1806 self._retreat(self._index - 1) 1807 return None 1808 1809 this = self._parse_table(schema=True) 1810 1811 if self._match(TokenType.DEFAULT): 1812 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1813 elif self._match_text_seq("FOR", "VALUES"): 1814 expression = self._parse_partition_bound_spec() 1815 else: 1816 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1817 1818 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1819 1820 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1821 self._match(TokenType.EQ) 1822 return self.expression( 1823 exp.PartitionedByProperty, 1824 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1825 ) 1826 1827 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1828 if self._match_text_seq("AND", "STATISTICS"): 1829 statistics = True 1830 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1831 statistics = False 1832 else: 1833 statistics = None 1834 1835 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1836 1837 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1838 if self._match_text_seq("PRIMARY", "INDEX"): 1839 return exp.NoPrimaryIndexProperty() 1840 return None 1841 1842 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1843 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1844 return exp.OnCommitProperty() 1845 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1846 return exp.OnCommitProperty(delete=True) 1847 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1848 1849 def _parse_distkey(self) -> exp.DistKeyProperty: 1850 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1851 1852 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1853 table = self._parse_table(schema=True) 1854 1855 options = [] 1856 while self._match_texts(("INCLUDING", "EXCLUDING")): 1857 this = self._prev.text.upper() 1858 1859 id_var = self._parse_id_var() 1860 if not id_var: 1861 return None 1862 1863 options.append( 1864 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1865 ) 1866 1867 return self.expression(exp.LikeProperty, this=table, expressions=options) 1868 1869 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1870 return self.expression( 1871 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1872 ) 1873 1874 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1875 self._match(TokenType.EQ) 1876 return self.expression( 1877 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1878 ) 1879 1880 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1881 self._match_text_seq("WITH", "CONNECTION") 1882 return self.expression( 1883 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1884 ) 1885 1886 def _parse_returns(self) -> exp.ReturnsProperty: 1887 value: t.Optional[exp.Expression] 1888 is_table = self._match(TokenType.TABLE) 1889 1890 if is_table: 1891 if self._match(TokenType.LT): 1892 value = self.expression( 1893 exp.Schema, 1894 this="TABLE", 1895 expressions=self._parse_csv(self._parse_struct_types), 1896 ) 1897 if not self._match(TokenType.GT): 1898 self.raise_error("Expecting >") 1899 else: 1900 value = self._parse_schema(exp.var("TABLE")) 1901 else: 1902 value = self._parse_types() 1903 1904 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1905 1906 def _parse_describe(self) -> exp.Describe: 1907 kind = self._match_set(self.CREATABLES) and self._prev.text 1908 this = self._parse_table(schema=True) 1909 properties = self._parse_properties() 1910 expressions = properties.expressions if properties else None 1911 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1912 1913 def _parse_insert(self) -> exp.Insert: 1914 comments = ensure_list(self._prev_comments) 1915 overwrite = self._match(TokenType.OVERWRITE) 1916 ignore = self._match(TokenType.IGNORE) 1917 local = self._match_text_seq("LOCAL") 1918 alternative = None 1919 1920 if self._match_text_seq("DIRECTORY"): 1921 this: t.Optional[exp.Expression] = self.expression( 1922 exp.Directory, 1923 this=self._parse_var_or_string(), 1924 local=local, 1925 row_format=self._parse_row_format(match_row=True), 1926 ) 1927 else: 1928 if self._match(TokenType.OR): 1929 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1930 1931 self._match(TokenType.INTO) 1932 comments += ensure_list(self._prev_comments) 1933 self._match(TokenType.TABLE) 1934 this = self._parse_table(schema=True) 1935 1936 returning = self._parse_returning() 1937 1938 return self.expression( 1939 exp.Insert, 1940 comments=comments, 1941 this=this, 1942 by_name=self._match_text_seq("BY", "NAME"), 1943 exists=self._parse_exists(), 1944 partition=self._parse_partition(), 1945 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1946 and self._parse_conjunction(), 1947 expression=self._parse_ddl_select(), 1948 conflict=self._parse_on_conflict(), 1949 returning=returning or self._parse_returning(), 1950 overwrite=overwrite, 1951 alternative=alternative, 1952 ignore=ignore, 1953 ) 1954 1955 def _parse_kill(self) -> exp.Kill: 1956 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1957 1958 return self.expression( 1959 exp.Kill, 1960 this=self._parse_primary(), 1961 kind=kind, 1962 ) 1963 1964 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1965 conflict = self._match_text_seq("ON", "CONFLICT") 1966 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1967 1968 if not conflict and not duplicate: 1969 return None 1970 1971 nothing = None 1972 expressions = None 1973 key = None 1974 constraint = None 1975 1976 if conflict: 1977 if self._match_text_seq("ON", "CONSTRAINT"): 1978 constraint = self._parse_id_var() 1979 else: 1980 key = self._parse_csv(self._parse_value) 1981 1982 self._match_text_seq("DO") 1983 if self._match_text_seq("NOTHING"): 1984 nothing = True 1985 else: 1986 self._match(TokenType.UPDATE) 1987 self._match(TokenType.SET) 1988 expressions = self._parse_csv(self._parse_equality) 1989 1990 return self.expression( 1991 exp.OnConflict, 1992 duplicate=duplicate, 1993 expressions=expressions, 1994 nothing=nothing, 1995 key=key, 1996 constraint=constraint, 1997 ) 1998 1999 def _parse_returning(self) -> t.Optional[exp.Returning]: 2000 if not self._match(TokenType.RETURNING): 2001 return None 2002 return self.expression( 2003 exp.Returning, 2004 expressions=self._parse_csv(self._parse_expression), 2005 into=self._match(TokenType.INTO) and self._parse_table_part(), 2006 ) 2007 2008 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2009 if not self._match(TokenType.FORMAT): 2010 return None 2011 return self._parse_row_format() 2012 2013 def _parse_row_format( 2014 self, match_row: bool = False 2015 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2016 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2017 return None 2018 2019 if self._match_text_seq("SERDE"): 2020 this = self._parse_string() 2021 2022 serde_properties = None 2023 if self._match(TokenType.SERDE_PROPERTIES): 2024 serde_properties = self.expression( 2025 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2026 ) 2027 2028 return self.expression( 2029 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2030 ) 2031 2032 self._match_text_seq("DELIMITED") 2033 2034 kwargs = {} 2035 2036 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2037 kwargs["fields"] = self._parse_string() 2038 if self._match_text_seq("ESCAPED", "BY"): 2039 kwargs["escaped"] = self._parse_string() 2040 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2041 kwargs["collection_items"] = self._parse_string() 2042 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2043 kwargs["map_keys"] = self._parse_string() 2044 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2045 kwargs["lines"] = self._parse_string() 2046 if self._match_text_seq("NULL", "DEFINED", "AS"): 2047 kwargs["null"] = self._parse_string() 2048 2049 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2050 2051 def _parse_load(self) -> exp.LoadData | exp.Command: 2052 if self._match_text_seq("DATA"): 2053 local = self._match_text_seq("LOCAL") 2054 self._match_text_seq("INPATH") 2055 inpath = self._parse_string() 2056 overwrite = self._match(TokenType.OVERWRITE) 2057 self._match_pair(TokenType.INTO, TokenType.TABLE) 2058 2059 return self.expression( 2060 exp.LoadData, 2061 this=self._parse_table(schema=True), 2062 local=local, 2063 overwrite=overwrite, 2064 inpath=inpath, 2065 partition=self._parse_partition(), 2066 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2067 serde=self._match_text_seq("SERDE") and self._parse_string(), 2068 ) 2069 return self._parse_as_command(self._prev) 2070 2071 def _parse_delete(self) -> exp.Delete: 2072 # This handles MySQL's "Multiple-Table Syntax" 2073 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2074 tables = None 2075 comments = self._prev_comments 2076 if not self._match(TokenType.FROM, advance=False): 2077 tables = self._parse_csv(self._parse_table) or None 2078 2079 returning = self._parse_returning() 2080 2081 return self.expression( 2082 exp.Delete, 2083 comments=comments, 2084 tables=tables, 2085 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2086 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2087 where=self._parse_where(), 2088 returning=returning or self._parse_returning(), 2089 limit=self._parse_limit(), 2090 ) 2091 2092 def _parse_update(self) -> exp.Update: 2093 comments = self._prev_comments 2094 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2095 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2096 returning = self._parse_returning() 2097 return self.expression( 2098 exp.Update, 2099 comments=comments, 2100 **{ # type: ignore 2101 "this": this, 2102 "expressions": expressions, 2103 "from": self._parse_from(joins=True), 2104 "where": self._parse_where(), 2105 "returning": returning or self._parse_returning(), 2106 "order": self._parse_order(), 2107 "limit": self._parse_limit(), 2108 }, 2109 ) 2110 2111 def _parse_uncache(self) -> exp.Uncache: 2112 if not self._match(TokenType.TABLE): 2113 self.raise_error("Expecting TABLE after UNCACHE") 2114 2115 return self.expression( 2116 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2117 ) 2118 2119 def _parse_cache(self) -> exp.Cache: 2120 lazy = self._match_text_seq("LAZY") 2121 self._match(TokenType.TABLE) 2122 table = self._parse_table(schema=True) 2123 2124 options = [] 2125 if self._match_text_seq("OPTIONS"): 2126 self._match_l_paren() 2127 k = self._parse_string() 2128 self._match(TokenType.EQ) 2129 v = self._parse_string() 2130 options = [k, v] 2131 self._match_r_paren() 2132 2133 self._match(TokenType.ALIAS) 2134 return self.expression( 2135 exp.Cache, 2136 this=table, 2137 lazy=lazy, 2138 options=options, 2139 expression=self._parse_select(nested=True), 2140 ) 2141 2142 def _parse_partition(self) -> t.Optional[exp.Partition]: 2143 if not self._match(TokenType.PARTITION): 2144 return None 2145 2146 return self.expression( 2147 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2148 ) 2149 2150 def _parse_value(self) -> exp.Tuple: 2151 if self._match(TokenType.L_PAREN): 2152 expressions = self._parse_csv(self._parse_conjunction) 2153 self._match_r_paren() 2154 return self.expression(exp.Tuple, expressions=expressions) 2155 2156 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2157 # https://prestodb.io/docs/current/sql/values.html 2158 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2159 2160 def _parse_projections(self) -> t.List[exp.Expression]: 2161 return self._parse_expressions() 2162 2163 def _parse_select( 2164 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2165 ) -> t.Optional[exp.Expression]: 2166 cte = self._parse_with() 2167 2168 if cte: 2169 this = self._parse_statement() 2170 2171 if not this: 2172 self.raise_error("Failed to parse any statement following CTE") 2173 return cte 2174 2175 if "with" in this.arg_types: 2176 this.set("with", cte) 2177 else: 2178 self.raise_error(f"{this.key} does not support CTE") 2179 this = cte 2180 2181 return this 2182 2183 # duckdb supports leading with FROM x 2184 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2185 2186 if self._match(TokenType.SELECT): 2187 comments = self._prev_comments 2188 2189 hint = self._parse_hint() 2190 all_ = self._match(TokenType.ALL) 2191 distinct = self._match_set(self.DISTINCT_TOKENS) 2192 2193 kind = ( 2194 self._match(TokenType.ALIAS) 2195 and self._match_texts(("STRUCT", "VALUE")) 2196 and self._prev.text 2197 ) 2198 2199 if distinct: 2200 distinct = self.expression( 2201 exp.Distinct, 2202 on=self._parse_value() if self._match(TokenType.ON) else None, 2203 ) 2204 2205 if all_ and distinct: 2206 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2207 2208 limit = self._parse_limit(top=True) 2209 projections = self._parse_projections() 2210 2211 this = self.expression( 2212 exp.Select, 2213 kind=kind, 2214 hint=hint, 2215 distinct=distinct, 2216 expressions=projections, 2217 limit=limit, 2218 ) 2219 this.comments = comments 2220 2221 into = self._parse_into() 2222 if into: 2223 this.set("into", into) 2224 2225 if not from_: 2226 from_ = self._parse_from() 2227 2228 if from_: 2229 this.set("from", from_) 2230 2231 this = self._parse_query_modifiers(this) 2232 elif (table or nested) and self._match(TokenType.L_PAREN): 2233 if self._match(TokenType.PIVOT): 2234 this = self._parse_simplified_pivot() 2235 elif self._match(TokenType.FROM): 2236 this = exp.select("*").from_( 2237 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2238 ) 2239 else: 2240 this = self._parse_table() if table else self._parse_select(nested=True) 2241 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2242 2243 self._match_r_paren() 2244 2245 # We return early here so that the UNION isn't attached to the subquery by the 2246 # following call to _parse_set_operations, but instead becomes the parent node 2247 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2248 elif self._match(TokenType.VALUES): 2249 this = self.expression( 2250 exp.Values, 2251 expressions=self._parse_csv(self._parse_value), 2252 alias=self._parse_table_alias(), 2253 ) 2254 elif from_: 2255 this = exp.select("*").from_(from_.this, copy=False) 2256 else: 2257 this = None 2258 2259 return self._parse_set_operations(this) 2260 2261 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2262 if not skip_with_token and not self._match(TokenType.WITH): 2263 return None 2264 2265 comments = self._prev_comments 2266 recursive = self._match(TokenType.RECURSIVE) 2267 2268 expressions = [] 2269 while True: 2270 expressions.append(self._parse_cte()) 2271 2272 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2273 break 2274 else: 2275 self._match(TokenType.WITH) 2276 2277 return self.expression( 2278 exp.With, comments=comments, expressions=expressions, recursive=recursive 2279 ) 2280 2281 def _parse_cte(self) -> exp.CTE: 2282 alias = self._parse_table_alias() 2283 if not alias or not alias.this: 2284 self.raise_error("Expected CTE to have alias") 2285 2286 self._match(TokenType.ALIAS) 2287 return self.expression( 2288 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2289 ) 2290 2291 def _parse_table_alias( 2292 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2293 ) -> t.Optional[exp.TableAlias]: 2294 any_token = self._match(TokenType.ALIAS) 2295 alias = ( 2296 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2297 or self._parse_string_as_identifier() 2298 ) 2299 2300 index = self._index 2301 if self._match(TokenType.L_PAREN): 2302 columns = self._parse_csv(self._parse_function_parameter) 2303 self._match_r_paren() if columns else self._retreat(index) 2304 else: 2305 columns = None 2306 2307 if not alias and not columns: 2308 return None 2309 2310 return self.expression(exp.TableAlias, this=alias, columns=columns) 2311 2312 def _parse_subquery( 2313 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2314 ) -> t.Optional[exp.Subquery]: 2315 if not this: 2316 return None 2317 2318 return self.expression( 2319 exp.Subquery, 2320 this=this, 2321 pivots=self._parse_pivots(), 2322 alias=self._parse_table_alias() if parse_alias else None, 2323 ) 2324 2325 def _parse_query_modifiers( 2326 self, this: t.Optional[exp.Expression] 2327 ) -> t.Optional[exp.Expression]: 2328 if isinstance(this, self.MODIFIABLES): 2329 for join in iter(self._parse_join, None): 2330 this.append("joins", join) 2331 for lateral in iter(self._parse_lateral, None): 2332 this.append("laterals", lateral) 2333 2334 while True: 2335 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2336 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2337 key, expression = parser(self) 2338 2339 if expression: 2340 this.set(key, expression) 2341 if key == "limit": 2342 offset = expression.args.pop("offset", None) 2343 if offset: 2344 this.set("offset", exp.Offset(expression=offset)) 2345 continue 2346 break 2347 return this 2348 2349 def _parse_hint(self) -> t.Optional[exp.Hint]: 2350 if self._match(TokenType.HINT): 2351 hints = [] 2352 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2353 hints.extend(hint) 2354 2355 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2356 self.raise_error("Expected */ after HINT") 2357 2358 return self.expression(exp.Hint, expressions=hints) 2359 2360 return None 2361 2362 def _parse_into(self) -> t.Optional[exp.Into]: 2363 if not self._match(TokenType.INTO): 2364 return None 2365 2366 temp = self._match(TokenType.TEMPORARY) 2367 unlogged = self._match_text_seq("UNLOGGED") 2368 self._match(TokenType.TABLE) 2369 2370 return self.expression( 2371 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2372 ) 2373 2374 def _parse_from( 2375 self, joins: bool = False, skip_from_token: bool = False 2376 ) -> t.Optional[exp.From]: 2377 if not skip_from_token and not self._match(TokenType.FROM): 2378 return None 2379 2380 return self.expression( 2381 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2382 ) 2383 2384 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2385 if not self._match(TokenType.MATCH_RECOGNIZE): 2386 return None 2387 2388 self._match_l_paren() 2389 2390 partition = self._parse_partition_by() 2391 order = self._parse_order() 2392 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2393 2394 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2395 rows = exp.var("ONE ROW PER MATCH") 2396 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2397 text = "ALL ROWS PER MATCH" 2398 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2399 text += f" SHOW EMPTY MATCHES" 2400 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2401 text += f" OMIT EMPTY MATCHES" 2402 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2403 text += f" WITH UNMATCHED ROWS" 2404 rows = exp.var(text) 2405 else: 2406 rows = None 2407 2408 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2409 text = "AFTER MATCH SKIP" 2410 if self._match_text_seq("PAST", "LAST", "ROW"): 2411 text += f" PAST LAST ROW" 2412 elif self._match_text_seq("TO", "NEXT", "ROW"): 2413 text += f" TO NEXT ROW" 2414 elif self._match_text_seq("TO", "FIRST"): 2415 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2416 elif self._match_text_seq("TO", "LAST"): 2417 text += f" TO LAST {self._advance_any().text}" # type: ignore 2418 after = exp.var(text) 2419 else: 2420 after = None 2421 2422 if self._match_text_seq("PATTERN"): 2423 self._match_l_paren() 2424 2425 if not self._curr: 2426 self.raise_error("Expecting )", self._curr) 2427 2428 paren = 1 2429 start = self._curr 2430 2431 while self._curr and paren > 0: 2432 if self._curr.token_type == TokenType.L_PAREN: 2433 paren += 1 2434 if self._curr.token_type == TokenType.R_PAREN: 2435 paren -= 1 2436 2437 end = self._prev 2438 self._advance() 2439 2440 if paren > 0: 2441 self.raise_error("Expecting )", self._curr) 2442 2443 pattern = exp.var(self._find_sql(start, end)) 2444 else: 2445 pattern = None 2446 2447 define = ( 2448 self._parse_csv( 2449 lambda: self.expression( 2450 exp.Alias, 2451 alias=self._parse_id_var(any_token=True), 2452 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2453 ) 2454 ) 2455 if self._match_text_seq("DEFINE") 2456 else None 2457 ) 2458 2459 self._match_r_paren() 2460 2461 return self.expression( 2462 exp.MatchRecognize, 2463 partition_by=partition, 2464 order=order, 2465 measures=measures, 2466 rows=rows, 2467 after=after, 2468 pattern=pattern, 2469 define=define, 2470 alias=self._parse_table_alias(), 2471 ) 2472 2473 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2474 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2475 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2476 2477 if outer_apply or cross_apply: 2478 this = self._parse_select(table=True) 2479 view = None 2480 outer = not cross_apply 2481 elif self._match(TokenType.LATERAL): 2482 this = self._parse_select(table=True) 2483 view = self._match(TokenType.VIEW) 2484 outer = self._match(TokenType.OUTER) 2485 else: 2486 return None 2487 2488 if not this: 2489 this = ( 2490 self._parse_unnest() 2491 or self._parse_function() 2492 or self._parse_id_var(any_token=False) 2493 ) 2494 2495 while self._match(TokenType.DOT): 2496 this = exp.Dot( 2497 this=this, 2498 expression=self._parse_function() or self._parse_id_var(any_token=False), 2499 ) 2500 2501 if view: 2502 table = self._parse_id_var(any_token=False) 2503 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2504 table_alias: t.Optional[exp.TableAlias] = self.expression( 2505 exp.TableAlias, this=table, columns=columns 2506 ) 2507 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2508 # We move the alias from the lateral's child node to the lateral itself 2509 table_alias = this.args["alias"].pop() 2510 else: 2511 table_alias = self._parse_table_alias() 2512 2513 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2514 2515 def _parse_join_parts( 2516 self, 2517 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2518 return ( 2519 self._match_set(self.JOIN_METHODS) and self._prev, 2520 self._match_set(self.JOIN_SIDES) and self._prev, 2521 self._match_set(self.JOIN_KINDS) and self._prev, 2522 ) 2523 2524 def _parse_join( 2525 self, skip_join_token: bool = False, parse_bracket: bool = False 2526 ) -> t.Optional[exp.Join]: 2527 if self._match(TokenType.COMMA): 2528 return self.expression(exp.Join, this=self._parse_table()) 2529 2530 index = self._index 2531 method, side, kind = self._parse_join_parts() 2532 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2533 join = self._match(TokenType.JOIN) 2534 2535 if not skip_join_token and not join: 2536 self._retreat(index) 2537 kind = None 2538 method = None 2539 side = None 2540 2541 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2542 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2543 2544 if not skip_join_token and not join and not outer_apply and not cross_apply: 2545 return None 2546 2547 if outer_apply: 2548 side = Token(TokenType.LEFT, "LEFT") 2549 2550 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2551 2552 if method: 2553 kwargs["method"] = method.text 2554 if side: 2555 kwargs["side"] = side.text 2556 if kind: 2557 kwargs["kind"] = kind.text 2558 if hint: 2559 kwargs["hint"] = hint 2560 2561 if self._match(TokenType.ON): 2562 kwargs["on"] = self._parse_conjunction() 2563 elif self._match(TokenType.USING): 2564 kwargs["using"] = self._parse_wrapped_id_vars() 2565 elif not (kind and kind.token_type == TokenType.CROSS): 2566 index = self._index 2567 join = self._parse_join() 2568 2569 if join and self._match(TokenType.ON): 2570 kwargs["on"] = self._parse_conjunction() 2571 elif join and self._match(TokenType.USING): 2572 kwargs["using"] = self._parse_wrapped_id_vars() 2573 else: 2574 join = None 2575 self._retreat(index) 2576 2577 kwargs["this"].set("joins", [join] if join else None) 2578 2579 comments = [c for token in (method, side, kind) if token for c in token.comments] 2580 return self.expression(exp.Join, comments=comments, **kwargs) 2581 2582 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2583 this = self._parse_conjunction() 2584 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2585 return this 2586 2587 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2588 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2589 2590 return this 2591 2592 def _parse_index( 2593 self, 2594 index: t.Optional[exp.Expression] = None, 2595 ) -> t.Optional[exp.Index]: 2596 if index: 2597 unique = None 2598 primary = None 2599 amp = None 2600 2601 self._match(TokenType.ON) 2602 self._match(TokenType.TABLE) # hive 2603 table = self._parse_table_parts(schema=True) 2604 else: 2605 unique = self._match(TokenType.UNIQUE) 2606 primary = self._match_text_seq("PRIMARY") 2607 amp = self._match_text_seq("AMP") 2608 2609 if not self._match(TokenType.INDEX): 2610 return None 2611 2612 index = self._parse_id_var() 2613 table = None 2614 2615 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2616 2617 if self._match(TokenType.L_PAREN, advance=False): 2618 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2619 else: 2620 columns = None 2621 2622 return self.expression( 2623 exp.Index, 2624 this=index, 2625 table=table, 2626 using=using, 2627 columns=columns, 2628 unique=unique, 2629 primary=primary, 2630 amp=amp, 2631 partition_by=self._parse_partition_by(), 2632 where=self._parse_where(), 2633 ) 2634 2635 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2636 hints: t.List[exp.Expression] = [] 2637 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2638 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2639 hints.append( 2640 self.expression( 2641 exp.WithTableHint, 2642 expressions=self._parse_csv( 2643 lambda: self._parse_function() or self._parse_var(any_token=True) 2644 ), 2645 ) 2646 ) 2647 self._match_r_paren() 2648 else: 2649 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2650 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2651 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2652 2653 self._match_texts({"INDEX", "KEY"}) 2654 if self._match(TokenType.FOR): 2655 hint.set("target", self._advance_any() and self._prev.text.upper()) 2656 2657 hint.set("expressions", self._parse_wrapped_id_vars()) 2658 hints.append(hint) 2659 2660 return hints or None 2661 2662 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2663 return ( 2664 (not schema and self._parse_function(optional_parens=False)) 2665 or self._parse_id_var(any_token=False) 2666 or self._parse_string_as_identifier() 2667 or self._parse_placeholder() 2668 ) 2669 2670 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2671 catalog = None 2672 db = None 2673 table = self._parse_table_part(schema=schema) 2674 2675 while self._match(TokenType.DOT): 2676 if catalog: 2677 # This allows nesting the table in arbitrarily many dot expressions if needed 2678 table = self.expression( 2679 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2680 ) 2681 else: 2682 catalog = db 2683 db = table 2684 table = self._parse_table_part(schema=schema) 2685 2686 if not table: 2687 self.raise_error(f"Expected table name but got {self._curr}") 2688 2689 return self.expression( 2690 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2691 ) 2692 2693 def _parse_table( 2694 self, 2695 schema: bool = False, 2696 joins: bool = False, 2697 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2698 parse_bracket: bool = False, 2699 ) -> t.Optional[exp.Expression]: 2700 lateral = self._parse_lateral() 2701 if lateral: 2702 return lateral 2703 2704 unnest = self._parse_unnest() 2705 if unnest: 2706 return unnest 2707 2708 values = self._parse_derived_table_values() 2709 if values: 2710 return values 2711 2712 subquery = self._parse_select(table=True) 2713 if subquery: 2714 if not subquery.args.get("pivots"): 2715 subquery.set("pivots", self._parse_pivots()) 2716 return subquery 2717 2718 bracket = parse_bracket and self._parse_bracket(None) 2719 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2720 this = t.cast( 2721 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2722 ) 2723 2724 if schema: 2725 return self._parse_schema(this=this) 2726 2727 version = self._parse_version() 2728 2729 if version: 2730 this.set("version", version) 2731 2732 if self.ALIAS_POST_TABLESAMPLE: 2733 table_sample = self._parse_table_sample() 2734 2735 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2736 if alias: 2737 this.set("alias", alias) 2738 2739 if self._match_text_seq("AT"): 2740 this.set("index", self._parse_id_var()) 2741 2742 this.set("hints", self._parse_table_hints()) 2743 2744 if not this.args.get("pivots"): 2745 this.set("pivots", self._parse_pivots()) 2746 2747 if not self.ALIAS_POST_TABLESAMPLE: 2748 table_sample = self._parse_table_sample() 2749 2750 if table_sample: 2751 table_sample.set("this", this) 2752 this = table_sample 2753 2754 if joins: 2755 for join in iter(self._parse_join, None): 2756 this.append("joins", join) 2757 2758 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2759 this.set("ordinality", True) 2760 this.set("alias", self._parse_table_alias()) 2761 2762 return this 2763 2764 def _parse_version(self) -> t.Optional[exp.Version]: 2765 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2766 this = "TIMESTAMP" 2767 elif self._match(TokenType.VERSION_SNAPSHOT): 2768 this = "VERSION" 2769 else: 2770 return None 2771 2772 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2773 kind = self._prev.text.upper() 2774 start = self._parse_bitwise() 2775 self._match_texts(("TO", "AND")) 2776 end = self._parse_bitwise() 2777 expression: t.Optional[exp.Expression] = self.expression( 2778 exp.Tuple, expressions=[start, end] 2779 ) 2780 elif self._match_text_seq("CONTAINED", "IN"): 2781 kind = "CONTAINED IN" 2782 expression = self.expression( 2783 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2784 ) 2785 elif self._match(TokenType.ALL): 2786 kind = "ALL" 2787 expression = None 2788 else: 2789 self._match_text_seq("AS", "OF") 2790 kind = "AS OF" 2791 expression = self._parse_type() 2792 2793 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2794 2795 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2796 if not self._match(TokenType.UNNEST): 2797 return None 2798 2799 expressions = self._parse_wrapped_csv(self._parse_type) 2800 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2801 2802 alias = self._parse_table_alias() if with_alias else None 2803 2804 if alias: 2805 if self.UNNEST_COLUMN_ONLY: 2806 if alias.args.get("columns"): 2807 self.raise_error("Unexpected extra column alias in unnest.") 2808 2809 alias.set("columns", [alias.this]) 2810 alias.set("this", None) 2811 2812 columns = alias.args.get("columns") or [] 2813 if offset and len(expressions) < len(columns): 2814 offset = columns.pop() 2815 2816 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2817 self._match(TokenType.ALIAS) 2818 offset = self._parse_id_var( 2819 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2820 ) or exp.to_identifier("offset") 2821 2822 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2823 2824 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2825 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2826 if not is_derived and not self._match(TokenType.VALUES): 2827 return None 2828 2829 expressions = self._parse_csv(self._parse_value) 2830 alias = self._parse_table_alias() 2831 2832 if is_derived: 2833 self._match_r_paren() 2834 2835 return self.expression( 2836 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2837 ) 2838 2839 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2840 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2841 as_modifier and self._match_text_seq("USING", "SAMPLE") 2842 ): 2843 return None 2844 2845 bucket_numerator = None 2846 bucket_denominator = None 2847 bucket_field = None 2848 percent = None 2849 rows = None 2850 size = None 2851 seed = None 2852 2853 kind = ( 2854 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2855 ) 2856 method = self._parse_var(tokens=(TokenType.ROW,)) 2857 2858 matched_l_paren = self._match(TokenType.L_PAREN) 2859 2860 if self.TABLESAMPLE_CSV: 2861 num = None 2862 expressions = self._parse_csv(self._parse_primary) 2863 else: 2864 expressions = None 2865 num = ( 2866 self._parse_factor() 2867 if self._match(TokenType.NUMBER, advance=False) 2868 else self._parse_primary() 2869 ) 2870 2871 if self._match_text_seq("BUCKET"): 2872 bucket_numerator = self._parse_number() 2873 self._match_text_seq("OUT", "OF") 2874 bucket_denominator = bucket_denominator = self._parse_number() 2875 self._match(TokenType.ON) 2876 bucket_field = self._parse_field() 2877 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2878 percent = num 2879 elif self._match(TokenType.ROWS): 2880 rows = num 2881 elif num: 2882 size = num 2883 2884 if matched_l_paren: 2885 self._match_r_paren() 2886 2887 if self._match(TokenType.L_PAREN): 2888 method = self._parse_var() 2889 seed = self._match(TokenType.COMMA) and self._parse_number() 2890 self._match_r_paren() 2891 elif self._match_texts(("SEED", "REPEATABLE")): 2892 seed = self._parse_wrapped(self._parse_number) 2893 2894 return self.expression( 2895 exp.TableSample, 2896 expressions=expressions, 2897 method=method, 2898 bucket_numerator=bucket_numerator, 2899 bucket_denominator=bucket_denominator, 2900 bucket_field=bucket_field, 2901 percent=percent, 2902 rows=rows, 2903 size=size, 2904 seed=seed, 2905 kind=kind, 2906 ) 2907 2908 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2909 return list(iter(self._parse_pivot, None)) or None 2910 2911 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2912 return list(iter(self._parse_join, None)) or None 2913 2914 # https://duckdb.org/docs/sql/statements/pivot 2915 def _parse_simplified_pivot(self) -> exp.Pivot: 2916 def _parse_on() -> t.Optional[exp.Expression]: 2917 this = self._parse_bitwise() 2918 return self._parse_in(this) if self._match(TokenType.IN) else this 2919 2920 this = self._parse_table() 2921 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2922 using = self._match(TokenType.USING) and self._parse_csv( 2923 lambda: self._parse_alias(self._parse_function()) 2924 ) 2925 group = self._parse_group() 2926 return self.expression( 2927 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2928 ) 2929 2930 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2931 index = self._index 2932 include_nulls = None 2933 2934 if self._match(TokenType.PIVOT): 2935 unpivot = False 2936 elif self._match(TokenType.UNPIVOT): 2937 unpivot = True 2938 2939 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2940 if self._match_text_seq("INCLUDE", "NULLS"): 2941 include_nulls = True 2942 elif self._match_text_seq("EXCLUDE", "NULLS"): 2943 include_nulls = False 2944 else: 2945 return None 2946 2947 expressions = [] 2948 field = None 2949 2950 if not self._match(TokenType.L_PAREN): 2951 self._retreat(index) 2952 return None 2953 2954 if unpivot: 2955 expressions = self._parse_csv(self._parse_column) 2956 else: 2957 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2958 2959 if not expressions: 2960 self.raise_error("Failed to parse PIVOT's aggregation list") 2961 2962 if not self._match(TokenType.FOR): 2963 self.raise_error("Expecting FOR") 2964 2965 value = self._parse_column() 2966 2967 if not self._match(TokenType.IN): 2968 self.raise_error("Expecting IN") 2969 2970 field = self._parse_in(value, alias=True) 2971 2972 self._match_r_paren() 2973 2974 pivot = self.expression( 2975 exp.Pivot, 2976 expressions=expressions, 2977 field=field, 2978 unpivot=unpivot, 2979 include_nulls=include_nulls, 2980 ) 2981 2982 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2983 pivot.set("alias", self._parse_table_alias()) 2984 2985 if not unpivot: 2986 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2987 2988 columns: t.List[exp.Expression] = [] 2989 for fld in pivot.args["field"].expressions: 2990 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2991 for name in names: 2992 if self.PREFIXED_PIVOT_COLUMNS: 2993 name = f"{name}_{field_name}" if name else field_name 2994 else: 2995 name = f"{field_name}_{name}" if name else field_name 2996 2997 columns.append(exp.to_identifier(name)) 2998 2999 pivot.set("columns", columns) 3000 3001 return pivot 3002 3003 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3004 return [agg.alias for agg in aggregations] 3005 3006 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3007 if not skip_where_token and not self._match(TokenType.WHERE): 3008 return None 3009 3010 return self.expression( 3011 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3012 ) 3013 3014 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3015 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3016 return None 3017 3018 elements = defaultdict(list) 3019 3020 if self._match(TokenType.ALL): 3021 return self.expression(exp.Group, all=True) 3022 3023 while True: 3024 expressions = self._parse_csv(self._parse_conjunction) 3025 if expressions: 3026 elements["expressions"].extend(expressions) 3027 3028 grouping_sets = self._parse_grouping_sets() 3029 if grouping_sets: 3030 elements["grouping_sets"].extend(grouping_sets) 3031 3032 rollup = None 3033 cube = None 3034 totals = None 3035 3036 index = self._index 3037 with_ = self._match(TokenType.WITH) 3038 if self._match(TokenType.ROLLUP): 3039 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3040 elements["rollup"].extend(ensure_list(rollup)) 3041 3042 if self._match(TokenType.CUBE): 3043 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3044 elements["cube"].extend(ensure_list(cube)) 3045 3046 if self._match_text_seq("TOTALS"): 3047 totals = True 3048 elements["totals"] = True # type: ignore 3049 3050 if not (grouping_sets or rollup or cube or totals): 3051 if with_: 3052 self._retreat(index) 3053 break 3054 3055 return self.expression(exp.Group, **elements) # type: ignore 3056 3057 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3058 if not self._match(TokenType.GROUPING_SETS): 3059 return None 3060 3061 return self._parse_wrapped_csv(self._parse_grouping_set) 3062 3063 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3064 if self._match(TokenType.L_PAREN): 3065 grouping_set = self._parse_csv(self._parse_column) 3066 self._match_r_paren() 3067 return self.expression(exp.Tuple, expressions=grouping_set) 3068 3069 return self._parse_column() 3070 3071 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3072 if not skip_having_token and not self._match(TokenType.HAVING): 3073 return None 3074 return self.expression(exp.Having, this=self._parse_conjunction()) 3075 3076 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3077 if not self._match(TokenType.QUALIFY): 3078 return None 3079 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3080 3081 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3082 if skip_start_token: 3083 start = None 3084 elif self._match(TokenType.START_WITH): 3085 start = self._parse_conjunction() 3086 else: 3087 return None 3088 3089 self._match(TokenType.CONNECT_BY) 3090 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3091 exp.Prior, this=self._parse_bitwise() 3092 ) 3093 connect = self._parse_conjunction() 3094 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3095 3096 if not start and self._match(TokenType.START_WITH): 3097 start = self._parse_conjunction() 3098 3099 return self.expression(exp.Connect, start=start, connect=connect) 3100 3101 def _parse_order( 3102 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3103 ) -> t.Optional[exp.Expression]: 3104 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3105 return this 3106 3107 return self.expression( 3108 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3109 ) 3110 3111 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3112 if not self._match(token): 3113 return None 3114 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3115 3116 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3117 this = parse_method() if parse_method else self._parse_conjunction() 3118 3119 asc = self._match(TokenType.ASC) 3120 desc = self._match(TokenType.DESC) or (asc and False) 3121 3122 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3123 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3124 3125 nulls_first = is_nulls_first or False 3126 explicitly_null_ordered = is_nulls_first or is_nulls_last 3127 3128 if ( 3129 not explicitly_null_ordered 3130 and ( 3131 (not desc and self.NULL_ORDERING == "nulls_are_small") 3132 or (desc and self.NULL_ORDERING != "nulls_are_small") 3133 ) 3134 and self.NULL_ORDERING != "nulls_are_last" 3135 ): 3136 nulls_first = True 3137 3138 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3139 3140 def _parse_limit( 3141 self, this: t.Optional[exp.Expression] = None, top: bool = False 3142 ) -> t.Optional[exp.Expression]: 3143 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3144 comments = self._prev_comments 3145 if top: 3146 limit_paren = self._match(TokenType.L_PAREN) 3147 expression = self._parse_number() 3148 3149 if limit_paren: 3150 self._match_r_paren() 3151 else: 3152 expression = self._parse_term() 3153 3154 if self._match(TokenType.COMMA): 3155 offset = expression 3156 expression = self._parse_term() 3157 else: 3158 offset = None 3159 3160 limit_exp = self.expression( 3161 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3162 ) 3163 3164 return limit_exp 3165 3166 if self._match(TokenType.FETCH): 3167 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3168 direction = self._prev.text if direction else "FIRST" 3169 3170 count = self._parse_field(tokens=self.FETCH_TOKENS) 3171 percent = self._match(TokenType.PERCENT) 3172 3173 self._match_set((TokenType.ROW, TokenType.ROWS)) 3174 3175 only = self._match_text_seq("ONLY") 3176 with_ties = self._match_text_seq("WITH", "TIES") 3177 3178 if only and with_ties: 3179 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3180 3181 return self.expression( 3182 exp.Fetch, 3183 direction=direction, 3184 count=count, 3185 percent=percent, 3186 with_ties=with_ties, 3187 ) 3188 3189 return this 3190 3191 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3192 if not self._match(TokenType.OFFSET): 3193 return this 3194 3195 count = self._parse_term() 3196 self._match_set((TokenType.ROW, TokenType.ROWS)) 3197 return self.expression(exp.Offset, this=this, expression=count) 3198 3199 def _parse_locks(self) -> t.List[exp.Lock]: 3200 locks = [] 3201 while True: 3202 if self._match_text_seq("FOR", "UPDATE"): 3203 update = True 3204 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3205 "LOCK", "IN", "SHARE", "MODE" 3206 ): 3207 update = False 3208 else: 3209 break 3210 3211 expressions = None 3212 if self._match_text_seq("OF"): 3213 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3214 3215 wait: t.Optional[bool | exp.Expression] = None 3216 if self._match_text_seq("NOWAIT"): 3217 wait = True 3218 elif self._match_text_seq("WAIT"): 3219 wait = self._parse_primary() 3220 elif self._match_text_seq("SKIP", "LOCKED"): 3221 wait = False 3222 3223 locks.append( 3224 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3225 ) 3226 3227 return locks 3228 3229 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3230 if not self._match_set(self.SET_OPERATIONS): 3231 return this 3232 3233 token_type = self._prev.token_type 3234 3235 if token_type == TokenType.UNION: 3236 expression = exp.Union 3237 elif token_type == TokenType.EXCEPT: 3238 expression = exp.Except 3239 else: 3240 expression = exp.Intersect 3241 3242 return self.expression( 3243 expression, 3244 comments=self._prev.comments, 3245 this=this, 3246 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3247 by_name=self._match_text_seq("BY", "NAME"), 3248 expression=self._parse_set_operations(self._parse_select(nested=True)), 3249 ) 3250 3251 def _parse_expression(self) -> t.Optional[exp.Expression]: 3252 return self._parse_alias(self._parse_conjunction()) 3253 3254 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3255 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3256 3257 def _parse_equality(self) -> t.Optional[exp.Expression]: 3258 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3259 3260 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3261 return self._parse_tokens(self._parse_range, self.COMPARISON) 3262 3263 def _parse_range(self) -> t.Optional[exp.Expression]: 3264 this = self._parse_bitwise() 3265 negate = self._match(TokenType.NOT) 3266 3267 if self._match_set(self.RANGE_PARSERS): 3268 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3269 if not expression: 3270 return this 3271 3272 this = expression 3273 elif self._match(TokenType.ISNULL): 3274 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3275 3276 # Postgres supports ISNULL and NOTNULL for conditions. 3277 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3278 if self._match(TokenType.NOTNULL): 3279 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3280 this = self.expression(exp.Not, this=this) 3281 3282 if negate: 3283 this = self.expression(exp.Not, this=this) 3284 3285 if self._match(TokenType.IS): 3286 this = self._parse_is(this) 3287 3288 return this 3289 3290 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3291 index = self._index - 1 3292 negate = self._match(TokenType.NOT) 3293 3294 if self._match_text_seq("DISTINCT", "FROM"): 3295 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3296 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3297 3298 expression = self._parse_null() or self._parse_boolean() 3299 if not expression: 3300 self._retreat(index) 3301 return None 3302 3303 this = self.expression(exp.Is, this=this, expression=expression) 3304 return self.expression(exp.Not, this=this) if negate else this 3305 3306 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3307 unnest = self._parse_unnest(with_alias=False) 3308 if unnest: 3309 this = self.expression(exp.In, this=this, unnest=unnest) 3310 elif self._match(TokenType.L_PAREN): 3311 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3312 3313 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3314 this = self.expression(exp.In, this=this, query=expressions[0]) 3315 else: 3316 this = self.expression(exp.In, this=this, expressions=expressions) 3317 3318 self._match_r_paren(this) 3319 else: 3320 this = self.expression(exp.In, this=this, field=self._parse_field()) 3321 3322 return this 3323 3324 def _parse_between(self, this: exp.Expression) -> exp.Between: 3325 low = self._parse_bitwise() 3326 self._match(TokenType.AND) 3327 high = self._parse_bitwise() 3328 return self.expression(exp.Between, this=this, low=low, high=high) 3329 3330 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3331 if not self._match(TokenType.ESCAPE): 3332 return this 3333 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3334 3335 def _parse_interval(self) -> t.Optional[exp.Interval]: 3336 index = self._index 3337 3338 if not self._match(TokenType.INTERVAL): 3339 return None 3340 3341 if self._match(TokenType.STRING, advance=False): 3342 this = self._parse_primary() 3343 else: 3344 this = self._parse_term() 3345 3346 if not this: 3347 self._retreat(index) 3348 return None 3349 3350 unit = self._parse_function() or self._parse_var(any_token=True) 3351 3352 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3353 # each INTERVAL expression into this canonical form so it's easy to transpile 3354 if this and this.is_number: 3355 this = exp.Literal.string(this.name) 3356 elif this and this.is_string: 3357 parts = this.name.split() 3358 3359 if len(parts) == 2: 3360 if unit: 3361 # This is not actually a unit, it's something else (e.g. a "window side") 3362 unit = None 3363 self._retreat(self._index - 1) 3364 3365 this = exp.Literal.string(parts[0]) 3366 unit = self.expression(exp.Var, this=parts[1]) 3367 3368 return self.expression(exp.Interval, this=this, unit=unit) 3369 3370 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3371 this = self._parse_term() 3372 3373 while True: 3374 if self._match_set(self.BITWISE): 3375 this = self.expression( 3376 self.BITWISE[self._prev.token_type], 3377 this=this, 3378 expression=self._parse_term(), 3379 ) 3380 elif self._match(TokenType.DQMARK): 3381 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3382 elif self._match_pair(TokenType.LT, TokenType.LT): 3383 this = self.expression( 3384 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3385 ) 3386 elif self._match_pair(TokenType.GT, TokenType.GT): 3387 this = self.expression( 3388 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3389 ) 3390 else: 3391 break 3392 3393 return this 3394 3395 def _parse_term(self) -> t.Optional[exp.Expression]: 3396 return self._parse_tokens(self._parse_factor, self.TERM) 3397 3398 def _parse_factor(self) -> t.Optional[exp.Expression]: 3399 if self.EXPONENT: 3400 return self._parse_tokens(self._parse_exponent, self.FACTOR) 3401 return self._parse_tokens(self._parse_unary, self.FACTOR) 3402 3403 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3404 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3405 3406 def _parse_unary(self) -> t.Optional[exp.Expression]: 3407 if self._match_set(self.UNARY_PARSERS): 3408 return self.UNARY_PARSERS[self._prev.token_type](self) 3409 return self._parse_at_time_zone(self._parse_type()) 3410 3411 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3412 interval = parse_interval and self._parse_interval() 3413 if interval: 3414 return interval 3415 3416 index = self._index 3417 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3418 this = self._parse_column() 3419 3420 if data_type: 3421 if isinstance(this, exp.Literal): 3422 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3423 if parser: 3424 return parser(self, this, data_type) 3425 return self.expression(exp.Cast, this=this, to=data_type) 3426 if not data_type.expressions: 3427 self._retreat(index) 3428 return self._parse_column() 3429 return self._parse_column_ops(data_type) 3430 3431 return this and self._parse_column_ops(this) 3432 3433 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3434 this = self._parse_type() 3435 if not this: 3436 return None 3437 3438 return self.expression( 3439 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3440 ) 3441 3442 def _parse_types( 3443 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3444 ) -> t.Optional[exp.Expression]: 3445 index = self._index 3446 3447 prefix = self._match_text_seq("SYSUDTLIB", ".") 3448 3449 if not self._match_set(self.TYPE_TOKENS): 3450 identifier = allow_identifiers and self._parse_id_var( 3451 any_token=False, tokens=(TokenType.VAR,) 3452 ) 3453 3454 if identifier: 3455 tokens = self._tokenizer.tokenize(identifier.name) 3456 3457 if len(tokens) != 1: 3458 self.raise_error("Unexpected identifier", self._prev) 3459 3460 if tokens[0].token_type in self.TYPE_TOKENS: 3461 self._prev = tokens[0] 3462 elif self.SUPPORTS_USER_DEFINED_TYPES: 3463 type_name = identifier.name 3464 3465 while self._match(TokenType.DOT): 3466 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3467 3468 return exp.DataType.build(type_name, udt=True) 3469 else: 3470 return None 3471 else: 3472 return None 3473 3474 type_token = self._prev.token_type 3475 3476 if type_token == TokenType.PSEUDO_TYPE: 3477 return self.expression(exp.PseudoType, this=self._prev.text) 3478 3479 if type_token == TokenType.OBJECT_IDENTIFIER: 3480 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3481 3482 nested = type_token in self.NESTED_TYPE_TOKENS 3483 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3484 expressions = None 3485 maybe_func = False 3486 3487 if self._match(TokenType.L_PAREN): 3488 if is_struct: 3489 expressions = self._parse_csv(self._parse_struct_types) 3490 elif nested: 3491 expressions = self._parse_csv( 3492 lambda: self._parse_types( 3493 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3494 ) 3495 ) 3496 elif type_token in self.ENUM_TYPE_TOKENS: 3497 expressions = self._parse_csv(self._parse_equality) 3498 else: 3499 expressions = self._parse_csv(self._parse_type_size) 3500 3501 if not expressions or not self._match(TokenType.R_PAREN): 3502 self._retreat(index) 3503 return None 3504 3505 maybe_func = True 3506 3507 this: t.Optional[exp.Expression] = None 3508 values: t.Optional[t.List[exp.Expression]] = None 3509 3510 if nested and self._match(TokenType.LT): 3511 if is_struct: 3512 expressions = self._parse_csv(self._parse_struct_types) 3513 else: 3514 expressions = self._parse_csv( 3515 lambda: self._parse_types( 3516 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3517 ) 3518 ) 3519 3520 if not self._match(TokenType.GT): 3521 self.raise_error("Expecting >") 3522 3523 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3524 values = self._parse_csv(self._parse_conjunction) 3525 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3526 3527 if type_token in self.TIMESTAMPS: 3528 if self._match_text_seq("WITH", "TIME", "ZONE"): 3529 maybe_func = False 3530 tz_type = ( 3531 exp.DataType.Type.TIMETZ 3532 if type_token in self.TIMES 3533 else exp.DataType.Type.TIMESTAMPTZ 3534 ) 3535 this = exp.DataType(this=tz_type, expressions=expressions) 3536 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3537 maybe_func = False 3538 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3539 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3540 maybe_func = False 3541 elif type_token == TokenType.INTERVAL: 3542 unit = self._parse_var() 3543 3544 if self._match_text_seq("TO"): 3545 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3546 else: 3547 span = None 3548 3549 if span or not unit: 3550 this = self.expression( 3551 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3552 ) 3553 else: 3554 this = self.expression(exp.Interval, unit=unit) 3555 3556 if maybe_func and check_func: 3557 index2 = self._index 3558 peek = self._parse_string() 3559 3560 if not peek: 3561 self._retreat(index) 3562 return None 3563 3564 self._retreat(index2) 3565 3566 if not this: 3567 if self._match_text_seq("UNSIGNED"): 3568 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3569 if not unsigned_type_token: 3570 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3571 3572 type_token = unsigned_type_token or type_token 3573 3574 this = exp.DataType( 3575 this=exp.DataType.Type[type_token.value], 3576 expressions=expressions, 3577 nested=nested, 3578 values=values, 3579 prefix=prefix, 3580 ) 3581 3582 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3583 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3584 3585 return this 3586 3587 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3588 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3589 self._match(TokenType.COLON) 3590 return self._parse_column_def(this) 3591 3592 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3593 if not self._match_text_seq("AT", "TIME", "ZONE"): 3594 return this 3595 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3596 3597 def _parse_column(self) -> t.Optional[exp.Expression]: 3598 this = self._parse_field() 3599 if isinstance(this, exp.Identifier): 3600 this = self.expression(exp.Column, this=this) 3601 elif not this: 3602 return self._parse_bracket(this) 3603 return self._parse_column_ops(this) 3604 3605 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3606 this = self._parse_bracket(this) 3607 3608 while self._match_set(self.COLUMN_OPERATORS): 3609 op_token = self._prev.token_type 3610 op = self.COLUMN_OPERATORS.get(op_token) 3611 3612 if op_token == TokenType.DCOLON: 3613 field = self._parse_types() 3614 if not field: 3615 self.raise_error("Expected type") 3616 elif op and self._curr: 3617 self._advance() 3618 value = self._prev.text 3619 field = ( 3620 exp.Literal.number(value) 3621 if self._prev.token_type == TokenType.NUMBER 3622 else exp.Literal.string(value) 3623 ) 3624 else: 3625 field = self._parse_field(anonymous_func=True, any_token=True) 3626 3627 if isinstance(field, exp.Func): 3628 # bigquery allows function calls like x.y.count(...) 3629 # SAFE.SUBSTR(...) 3630 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3631 this = self._replace_columns_with_dots(this) 3632 3633 if op: 3634 this = op(self, this, field) 3635 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3636 this = self.expression( 3637 exp.Column, 3638 this=field, 3639 table=this.this, 3640 db=this.args.get("table"), 3641 catalog=this.args.get("db"), 3642 ) 3643 else: 3644 this = self.expression(exp.Dot, this=this, expression=field) 3645 this = self._parse_bracket(this) 3646 return this 3647 3648 def _parse_primary(self) -> t.Optional[exp.Expression]: 3649 if self._match_set(self.PRIMARY_PARSERS): 3650 token_type = self._prev.token_type 3651 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3652 3653 if token_type == TokenType.STRING: 3654 expressions = [primary] 3655 while self._match(TokenType.STRING): 3656 expressions.append(exp.Literal.string(self._prev.text)) 3657 3658 if len(expressions) > 1: 3659 return self.expression(exp.Concat, expressions=expressions) 3660 3661 return primary 3662 3663 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3664 return exp.Literal.number(f"0.{self._prev.text}") 3665 3666 if self._match(TokenType.L_PAREN): 3667 comments = self._prev_comments 3668 query = self._parse_select() 3669 3670 if query: 3671 expressions = [query] 3672 else: 3673 expressions = self._parse_expressions() 3674 3675 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3676 3677 if isinstance(this, exp.Subqueryable): 3678 this = self._parse_set_operations( 3679 self._parse_subquery(this=this, parse_alias=False) 3680 ) 3681 elif len(expressions) > 1: 3682 this = self.expression(exp.Tuple, expressions=expressions) 3683 else: 3684 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3685 3686 if this: 3687 this.add_comments(comments) 3688 3689 self._match_r_paren(expression=this) 3690 return this 3691 3692 return None 3693 3694 def _parse_field( 3695 self, 3696 any_token: bool = False, 3697 tokens: t.Optional[t.Collection[TokenType]] = None, 3698 anonymous_func: bool = False, 3699 ) -> t.Optional[exp.Expression]: 3700 return ( 3701 self._parse_primary() 3702 or self._parse_function(anonymous=anonymous_func) 3703 or self._parse_id_var(any_token=any_token, tokens=tokens) 3704 ) 3705 3706 def _parse_function( 3707 self, 3708 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3709 anonymous: bool = False, 3710 optional_parens: bool = True, 3711 ) -> t.Optional[exp.Expression]: 3712 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3713 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3714 fn_syntax = False 3715 if ( 3716 self._match(TokenType.L_BRACE, advance=False) 3717 and self._next 3718 and self._next.text.upper() == "FN" 3719 ): 3720 self._advance(2) 3721 fn_syntax = True 3722 3723 func = self._parse_function_call( 3724 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3725 ) 3726 3727 if fn_syntax: 3728 self._match(TokenType.R_BRACE) 3729 3730 return func 3731 3732 def _parse_function_call( 3733 self, 3734 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3735 anonymous: bool = False, 3736 optional_parens: bool = True, 3737 ) -> t.Optional[exp.Expression]: 3738 if not self._curr: 3739 return None 3740 3741 token_type = self._curr.token_type 3742 this = self._curr.text 3743 upper = this.upper() 3744 3745 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3746 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3747 self._advance() 3748 return parser(self) 3749 3750 if not self._next or self._next.token_type != TokenType.L_PAREN: 3751 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3752 self._advance() 3753 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3754 3755 return None 3756 3757 if token_type not in self.FUNC_TOKENS: 3758 return None 3759 3760 self._advance(2) 3761 3762 parser = self.FUNCTION_PARSERS.get(upper) 3763 if parser and not anonymous: 3764 this = parser(self) 3765 else: 3766 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3767 3768 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3769 this = self.expression(subquery_predicate, this=self._parse_select()) 3770 self._match_r_paren() 3771 return this 3772 3773 if functions is None: 3774 functions = self.FUNCTIONS 3775 3776 function = functions.get(upper) 3777 3778 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3779 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3780 3781 if function and not anonymous: 3782 func = self.validate_expression(function(args), args) 3783 if not self.NORMALIZE_FUNCTIONS: 3784 func.meta["name"] = this 3785 this = func 3786 else: 3787 this = self.expression(exp.Anonymous, this=this, expressions=args) 3788 3789 self._match_r_paren(this) 3790 return self._parse_window(this) 3791 3792 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3793 return self._parse_column_def(self._parse_id_var()) 3794 3795 def _parse_user_defined_function( 3796 self, kind: t.Optional[TokenType] = None 3797 ) -> t.Optional[exp.Expression]: 3798 this = self._parse_id_var() 3799 3800 while self._match(TokenType.DOT): 3801 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3802 3803 if not self._match(TokenType.L_PAREN): 3804 return this 3805 3806 expressions = self._parse_csv(self._parse_function_parameter) 3807 self._match_r_paren() 3808 return self.expression( 3809 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3810 ) 3811 3812 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3813 literal = self._parse_primary() 3814 if literal: 3815 return self.expression(exp.Introducer, this=token.text, expression=literal) 3816 3817 return self.expression(exp.Identifier, this=token.text) 3818 3819 def _parse_session_parameter(self) -> exp.SessionParameter: 3820 kind = None 3821 this = self._parse_id_var() or self._parse_primary() 3822 3823 if this and self._match(TokenType.DOT): 3824 kind = this.name 3825 this = self._parse_var() or self._parse_primary() 3826 3827 return self.expression(exp.SessionParameter, this=this, kind=kind) 3828 3829 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3830 index = self._index 3831 3832 if self._match(TokenType.L_PAREN): 3833 expressions = t.cast( 3834 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3835 ) 3836 3837 if not self._match(TokenType.R_PAREN): 3838 self._retreat(index) 3839 else: 3840 expressions = [self._parse_id_var()] 3841 3842 if self._match_set(self.LAMBDAS): 3843 return self.LAMBDAS[self._prev.token_type](self, expressions) 3844 3845 self._retreat(index) 3846 3847 this: t.Optional[exp.Expression] 3848 3849 if self._match(TokenType.DISTINCT): 3850 this = self.expression( 3851 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3852 ) 3853 else: 3854 this = self._parse_select_or_expression(alias=alias) 3855 3856 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3857 3858 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3859 index = self._index 3860 3861 if not self.errors: 3862 try: 3863 if self._parse_select(nested=True): 3864 return this 3865 except ParseError: 3866 pass 3867 finally: 3868 self.errors.clear() 3869 self._retreat(index) 3870 3871 if not self._match(TokenType.L_PAREN): 3872 return this 3873 3874 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3875 3876 self._match_r_paren() 3877 return self.expression(exp.Schema, this=this, expressions=args) 3878 3879 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3880 return self._parse_column_def(self._parse_field(any_token=True)) 3881 3882 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3883 # column defs are not really columns, they're identifiers 3884 if isinstance(this, exp.Column): 3885 this = this.this 3886 3887 kind = self._parse_types(schema=True) 3888 3889 if self._match_text_seq("FOR", "ORDINALITY"): 3890 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3891 3892 constraints: t.List[exp.Expression] = [] 3893 3894 if not kind and self._match(TokenType.ALIAS): 3895 constraints.append( 3896 self.expression( 3897 exp.ComputedColumnConstraint, 3898 this=self._parse_conjunction(), 3899 persisted=self._match_text_seq("PERSISTED"), 3900 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3901 ) 3902 ) 3903 3904 while True: 3905 constraint = self._parse_column_constraint() 3906 if not constraint: 3907 break 3908 constraints.append(constraint) 3909 3910 if not kind and not constraints: 3911 return this 3912 3913 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3914 3915 def _parse_auto_increment( 3916 self, 3917 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3918 start = None 3919 increment = None 3920 3921 if self._match(TokenType.L_PAREN, advance=False): 3922 args = self._parse_wrapped_csv(self._parse_bitwise) 3923 start = seq_get(args, 0) 3924 increment = seq_get(args, 1) 3925 elif self._match_text_seq("START"): 3926 start = self._parse_bitwise() 3927 self._match_text_seq("INCREMENT") 3928 increment = self._parse_bitwise() 3929 3930 if start and increment: 3931 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3932 3933 return exp.AutoIncrementColumnConstraint() 3934 3935 def _parse_compress(self) -> exp.CompressColumnConstraint: 3936 if self._match(TokenType.L_PAREN, advance=False): 3937 return self.expression( 3938 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3939 ) 3940 3941 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3942 3943 def _parse_generated_as_identity( 3944 self, 3945 ) -> ( 3946 exp.GeneratedAsIdentityColumnConstraint 3947 | exp.ComputedColumnConstraint 3948 | exp.GeneratedAsRowColumnConstraint 3949 ): 3950 if self._match_text_seq("BY", "DEFAULT"): 3951 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3952 this = self.expression( 3953 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3954 ) 3955 else: 3956 self._match_text_seq("ALWAYS") 3957 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3958 3959 self._match(TokenType.ALIAS) 3960 3961 if self._match_text_seq("ROW"): 3962 start = self._match_text_seq("START") 3963 if not start: 3964 self._match(TokenType.END) 3965 hidden = self._match_text_seq("HIDDEN") 3966 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3967 3968 identity = self._match_text_seq("IDENTITY") 3969 3970 if self._match(TokenType.L_PAREN): 3971 if self._match(TokenType.START_WITH): 3972 this.set("start", self._parse_bitwise()) 3973 if self._match_text_seq("INCREMENT", "BY"): 3974 this.set("increment", self._parse_bitwise()) 3975 if self._match_text_seq("MINVALUE"): 3976 this.set("minvalue", self._parse_bitwise()) 3977 if self._match_text_seq("MAXVALUE"): 3978 this.set("maxvalue", self._parse_bitwise()) 3979 3980 if self._match_text_seq("CYCLE"): 3981 this.set("cycle", True) 3982 elif self._match_text_seq("NO", "CYCLE"): 3983 this.set("cycle", False) 3984 3985 if not identity: 3986 this.set("expression", self._parse_bitwise()) 3987 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3988 args = self._parse_csv(self._parse_bitwise) 3989 this.set("start", seq_get(args, 0)) 3990 this.set("increment", seq_get(args, 1)) 3991 3992 self._match_r_paren() 3993 3994 return this 3995 3996 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3997 self._match_text_seq("LENGTH") 3998 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3999 4000 def _parse_not_constraint( 4001 self, 4002 ) -> t.Optional[exp.Expression]: 4003 if self._match_text_seq("NULL"): 4004 return self.expression(exp.NotNullColumnConstraint) 4005 if self._match_text_seq("CASESPECIFIC"): 4006 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4007 if self._match_text_seq("FOR", "REPLICATION"): 4008 return self.expression(exp.NotForReplicationColumnConstraint) 4009 return None 4010 4011 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4012 if self._match(TokenType.CONSTRAINT): 4013 this = self._parse_id_var() 4014 else: 4015 this = None 4016 4017 if self._match_texts(self.CONSTRAINT_PARSERS): 4018 return self.expression( 4019 exp.ColumnConstraint, 4020 this=this, 4021 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4022 ) 4023 4024 return this 4025 4026 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4027 if not self._match(TokenType.CONSTRAINT): 4028 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4029 4030 this = self._parse_id_var() 4031 expressions = [] 4032 4033 while True: 4034 constraint = self._parse_unnamed_constraint() or self._parse_function() 4035 if not constraint: 4036 break 4037 expressions.append(constraint) 4038 4039 return self.expression(exp.Constraint, this=this, expressions=expressions) 4040 4041 def _parse_unnamed_constraint( 4042 self, constraints: t.Optional[t.Collection[str]] = None 4043 ) -> t.Optional[exp.Expression]: 4044 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4045 constraints or self.CONSTRAINT_PARSERS 4046 ): 4047 return None 4048 4049 constraint = self._prev.text.upper() 4050 if constraint not in self.CONSTRAINT_PARSERS: 4051 self.raise_error(f"No parser found for schema constraint {constraint}.") 4052 4053 return self.CONSTRAINT_PARSERS[constraint](self) 4054 4055 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4056 self._match_text_seq("KEY") 4057 return self.expression( 4058 exp.UniqueColumnConstraint, 4059 this=self._parse_schema(self._parse_id_var(any_token=False)), 4060 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4061 ) 4062 4063 def _parse_key_constraint_options(self) -> t.List[str]: 4064 options = [] 4065 while True: 4066 if not self._curr: 4067 break 4068 4069 if self._match(TokenType.ON): 4070 action = None 4071 on = self._advance_any() and self._prev.text 4072 4073 if self._match_text_seq("NO", "ACTION"): 4074 action = "NO ACTION" 4075 elif self._match_text_seq("CASCADE"): 4076 action = "CASCADE" 4077 elif self._match_text_seq("RESTRICT"): 4078 action = "RESTRICT" 4079 elif self._match_pair(TokenType.SET, TokenType.NULL): 4080 action = "SET NULL" 4081 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4082 action = "SET DEFAULT" 4083 else: 4084 self.raise_error("Invalid key constraint") 4085 4086 options.append(f"ON {on} {action}") 4087 elif self._match_text_seq("NOT", "ENFORCED"): 4088 options.append("NOT ENFORCED") 4089 elif self._match_text_seq("DEFERRABLE"): 4090 options.append("DEFERRABLE") 4091 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4092 options.append("INITIALLY DEFERRED") 4093 elif self._match_text_seq("NORELY"): 4094 options.append("NORELY") 4095 elif self._match_text_seq("MATCH", "FULL"): 4096 options.append("MATCH FULL") 4097 else: 4098 break 4099 4100 return options 4101 4102 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4103 if match and not self._match(TokenType.REFERENCES): 4104 return None 4105 4106 expressions = None 4107 this = self._parse_table(schema=True) 4108 options = self._parse_key_constraint_options() 4109 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4110 4111 def _parse_foreign_key(self) -> exp.ForeignKey: 4112 expressions = self._parse_wrapped_id_vars() 4113 reference = self._parse_references() 4114 options = {} 4115 4116 while self._match(TokenType.ON): 4117 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4118 self.raise_error("Expected DELETE or UPDATE") 4119 4120 kind = self._prev.text.lower() 4121 4122 if self._match_text_seq("NO", "ACTION"): 4123 action = "NO ACTION" 4124 elif self._match(TokenType.SET): 4125 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4126 action = "SET " + self._prev.text.upper() 4127 else: 4128 self._advance() 4129 action = self._prev.text.upper() 4130 4131 options[kind] = action 4132 4133 return self.expression( 4134 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4135 ) 4136 4137 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4138 return self._parse_field() 4139 4140 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4141 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4142 4143 id_vars = self._parse_wrapped_id_vars() 4144 return self.expression( 4145 exp.PeriodForSystemTimeConstraint, 4146 this=seq_get(id_vars, 0), 4147 expression=seq_get(id_vars, 1), 4148 ) 4149 4150 def _parse_primary_key( 4151 self, wrapped_optional: bool = False, in_props: bool = False 4152 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4153 desc = ( 4154 self._match_set((TokenType.ASC, TokenType.DESC)) 4155 and self._prev.token_type == TokenType.DESC 4156 ) 4157 4158 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4159 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4160 4161 expressions = self._parse_wrapped_csv( 4162 self._parse_primary_key_part, optional=wrapped_optional 4163 ) 4164 options = self._parse_key_constraint_options() 4165 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4166 4167 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4168 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4169 return this 4170 4171 bracket_kind = self._prev.token_type 4172 4173 if self._match(TokenType.COLON): 4174 expressions: t.List[exp.Expression] = [ 4175 self.expression(exp.Slice, expression=self._parse_conjunction()) 4176 ] 4177 else: 4178 expressions = self._parse_csv( 4179 lambda: self._parse_slice( 4180 self._parse_alias(self._parse_conjunction(), explicit=True) 4181 ) 4182 ) 4183 4184 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4185 self.raise_error("Expected ]") 4186 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4187 self.raise_error("Expected }") 4188 4189 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4190 if bracket_kind == TokenType.L_BRACE: 4191 this = self.expression(exp.Struct, expressions=expressions) 4192 elif not this or this.name.upper() == "ARRAY": 4193 this = self.expression(exp.Array, expressions=expressions) 4194 else: 4195 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4196 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4197 4198 self._add_comments(this) 4199 return self._parse_bracket(this) 4200 4201 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4202 if self._match(TokenType.COLON): 4203 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4204 return this 4205 4206 def _parse_case(self) -> t.Optional[exp.Expression]: 4207 ifs = [] 4208 default = None 4209 4210 comments = self._prev_comments 4211 expression = self._parse_conjunction() 4212 4213 while self._match(TokenType.WHEN): 4214 this = self._parse_conjunction() 4215 self._match(TokenType.THEN) 4216 then = self._parse_conjunction() 4217 ifs.append(self.expression(exp.If, this=this, true=then)) 4218 4219 if self._match(TokenType.ELSE): 4220 default = self._parse_conjunction() 4221 4222 if not self._match(TokenType.END): 4223 self.raise_error("Expected END after CASE", self._prev) 4224 4225 return self._parse_window( 4226 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4227 ) 4228 4229 def _parse_if(self) -> t.Optional[exp.Expression]: 4230 if self._match(TokenType.L_PAREN): 4231 args = self._parse_csv(self._parse_conjunction) 4232 this = self.validate_expression(exp.If.from_arg_list(args), args) 4233 self._match_r_paren() 4234 else: 4235 index = self._index - 1 4236 condition = self._parse_conjunction() 4237 4238 if not condition: 4239 self._retreat(index) 4240 return None 4241 4242 self._match(TokenType.THEN) 4243 true = self._parse_conjunction() 4244 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4245 self._match(TokenType.END) 4246 this = self.expression(exp.If, this=condition, true=true, false=false) 4247 4248 return self._parse_window(this) 4249 4250 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4251 if not self._match_text_seq("VALUE", "FOR"): 4252 self._retreat(self._index - 1) 4253 return None 4254 4255 return self.expression( 4256 exp.NextValueFor, 4257 this=self._parse_column(), 4258 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4259 ) 4260 4261 def _parse_extract(self) -> exp.Extract: 4262 this = self._parse_function() or self._parse_var() or self._parse_type() 4263 4264 if self._match(TokenType.FROM): 4265 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4266 4267 if not self._match(TokenType.COMMA): 4268 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4269 4270 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4271 4272 def _parse_any_value(self) -> exp.AnyValue: 4273 this = self._parse_lambda() 4274 is_max = None 4275 having = None 4276 4277 if self._match(TokenType.HAVING): 4278 self._match_texts(("MAX", "MIN")) 4279 is_max = self._prev.text == "MAX" 4280 having = self._parse_column() 4281 4282 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4283 4284 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4285 this = self._parse_conjunction() 4286 4287 if not self._match(TokenType.ALIAS): 4288 if self._match(TokenType.COMMA): 4289 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4290 4291 self.raise_error("Expected AS after CAST") 4292 4293 fmt = None 4294 to = self._parse_types() 4295 4296 if self._match(TokenType.FORMAT): 4297 fmt_string = self._parse_string() 4298 fmt = self._parse_at_time_zone(fmt_string) 4299 4300 if not to: 4301 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4302 if to.this in exp.DataType.TEMPORAL_TYPES: 4303 this = self.expression( 4304 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4305 this=this, 4306 format=exp.Literal.string( 4307 format_time( 4308 fmt_string.this if fmt_string else "", 4309 self.FORMAT_MAPPING or self.TIME_MAPPING, 4310 self.FORMAT_TRIE or self.TIME_TRIE, 4311 ) 4312 ), 4313 ) 4314 4315 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4316 this.set("zone", fmt.args["zone"]) 4317 return this 4318 elif not to: 4319 self.raise_error("Expected TYPE after CAST") 4320 elif isinstance(to, exp.Identifier): 4321 to = exp.DataType.build(to.name, udt=True) 4322 elif to.this == exp.DataType.Type.CHAR: 4323 if self._match(TokenType.CHARACTER_SET): 4324 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4325 4326 return self.expression( 4327 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4328 ) 4329 4330 def _parse_concat(self) -> t.Optional[exp.Expression]: 4331 args = self._parse_csv(self._parse_conjunction) 4332 if self.CONCAT_NULL_OUTPUTS_STRING: 4333 args = self._ensure_string_if_null(args) 4334 4335 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4336 # we find such a call we replace it with its argument. 4337 if len(args) == 1: 4338 return args[0] 4339 4340 return self.expression( 4341 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4342 ) 4343 4344 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4345 args = self._parse_csv(self._parse_conjunction) 4346 if len(args) < 2: 4347 return self.expression(exp.ConcatWs, expressions=args) 4348 delim, *values = args 4349 if self.CONCAT_NULL_OUTPUTS_STRING: 4350 values = self._ensure_string_if_null(values) 4351 4352 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4353 4354 def _parse_string_agg(self) -> exp.Expression: 4355 if self._match(TokenType.DISTINCT): 4356 args: t.List[t.Optional[exp.Expression]] = [ 4357 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4358 ] 4359 if self._match(TokenType.COMMA): 4360 args.extend(self._parse_csv(self._parse_conjunction)) 4361 else: 4362 args = self._parse_csv(self._parse_conjunction) # type: ignore 4363 4364 index = self._index 4365 if not self._match(TokenType.R_PAREN) and args: 4366 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4367 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4368 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4369 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4370 4371 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4372 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4373 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4374 if not self._match_text_seq("WITHIN", "GROUP"): 4375 self._retreat(index) 4376 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4377 4378 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4379 order = self._parse_order(this=seq_get(args, 0)) 4380 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4381 4382 def _parse_convert( 4383 self, strict: bool, safe: t.Optional[bool] = None 4384 ) -> t.Optional[exp.Expression]: 4385 this = self._parse_bitwise() 4386 4387 if self._match(TokenType.USING): 4388 to: t.Optional[exp.Expression] = self.expression( 4389 exp.CharacterSet, this=self._parse_var() 4390 ) 4391 elif self._match(TokenType.COMMA): 4392 to = self._parse_types() 4393 else: 4394 to = None 4395 4396 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4397 4398 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4399 """ 4400 There are generally two variants of the DECODE function: 4401 4402 - DECODE(bin, charset) 4403 - DECODE(expression, search, result [, search, result] ... [, default]) 4404 4405 The second variant will always be parsed into a CASE expression. Note that NULL 4406 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4407 instead of relying on pattern matching. 4408 """ 4409 args = self._parse_csv(self._parse_conjunction) 4410 4411 if len(args) < 3: 4412 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4413 4414 expression, *expressions = args 4415 if not expression: 4416 return None 4417 4418 ifs = [] 4419 for search, result in zip(expressions[::2], expressions[1::2]): 4420 if not search or not result: 4421 return None 4422 4423 if isinstance(search, exp.Literal): 4424 ifs.append( 4425 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4426 ) 4427 elif isinstance(search, exp.Null): 4428 ifs.append( 4429 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4430 ) 4431 else: 4432 cond = exp.or_( 4433 exp.EQ(this=expression.copy(), expression=search), 4434 exp.and_( 4435 exp.Is(this=expression.copy(), expression=exp.Null()), 4436 exp.Is(this=search.copy(), expression=exp.Null()), 4437 copy=False, 4438 ), 4439 copy=False, 4440 ) 4441 ifs.append(exp.If(this=cond, true=result)) 4442 4443 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4444 4445 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4446 self._match_text_seq("KEY") 4447 key = self._parse_column() 4448 self._match_set((TokenType.COLON, TokenType.COMMA)) 4449 self._match_text_seq("VALUE") 4450 value = self._parse_bitwise() 4451 4452 if not key and not value: 4453 return None 4454 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4455 4456 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4457 if not this or not self._match_text_seq("FORMAT", "JSON"): 4458 return this 4459 4460 return self.expression(exp.FormatJson, this=this) 4461 4462 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4463 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4464 for value in values: 4465 if self._match_text_seq(value, "ON", on): 4466 return f"{value} ON {on}" 4467 4468 return None 4469 4470 def _parse_json_object(self) -> exp.JSONObject: 4471 star = self._parse_star() 4472 expressions = ( 4473 [star] 4474 if star 4475 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4476 ) 4477 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4478 4479 unique_keys = None 4480 if self._match_text_seq("WITH", "UNIQUE"): 4481 unique_keys = True 4482 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4483 unique_keys = False 4484 4485 self._match_text_seq("KEYS") 4486 4487 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4488 self._parse_type() 4489 ) 4490 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4491 4492 return self.expression( 4493 exp.JSONObject, 4494 expressions=expressions, 4495 null_handling=null_handling, 4496 unique_keys=unique_keys, 4497 return_type=return_type, 4498 encoding=encoding, 4499 ) 4500 4501 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4502 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4503 if not self._match_text_seq("NESTED"): 4504 this = self._parse_id_var() 4505 kind = self._parse_types(allow_identifiers=False) 4506 nested = None 4507 else: 4508 this = None 4509 kind = None 4510 nested = True 4511 4512 path = self._match_text_seq("PATH") and self._parse_string() 4513 nested_schema = nested and self._parse_json_schema() 4514 4515 return self.expression( 4516 exp.JSONColumnDef, 4517 this=this, 4518 kind=kind, 4519 path=path, 4520 nested_schema=nested_schema, 4521 ) 4522 4523 def _parse_json_schema(self) -> exp.JSONSchema: 4524 self._match_text_seq("COLUMNS") 4525 return self.expression( 4526 exp.JSONSchema, 4527 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4528 ) 4529 4530 def _parse_json_table(self) -> exp.JSONTable: 4531 this = self._parse_format_json(self._parse_bitwise()) 4532 path = self._match(TokenType.COMMA) and self._parse_string() 4533 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4534 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4535 schema = self._parse_json_schema() 4536 4537 return exp.JSONTable( 4538 this=this, 4539 schema=schema, 4540 path=path, 4541 error_handling=error_handling, 4542 empty_handling=empty_handling, 4543 ) 4544 4545 def _parse_logarithm(self) -> exp.Func: 4546 # Default argument order is base, expression 4547 args = self._parse_csv(self._parse_range) 4548 4549 if len(args) > 1: 4550 if not self.LOG_BASE_FIRST: 4551 args.reverse() 4552 return exp.Log.from_arg_list(args) 4553 4554 return self.expression( 4555 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4556 ) 4557 4558 def _parse_match_against(self) -> exp.MatchAgainst: 4559 expressions = self._parse_csv(self._parse_column) 4560 4561 self._match_text_seq(")", "AGAINST", "(") 4562 4563 this = self._parse_string() 4564 4565 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4566 modifier = "IN NATURAL LANGUAGE MODE" 4567 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4568 modifier = f"{modifier} WITH QUERY EXPANSION" 4569 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4570 modifier = "IN BOOLEAN MODE" 4571 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4572 modifier = "WITH QUERY EXPANSION" 4573 else: 4574 modifier = None 4575 4576 return self.expression( 4577 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4578 ) 4579 4580 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4581 def _parse_open_json(self) -> exp.OpenJSON: 4582 this = self._parse_bitwise() 4583 path = self._match(TokenType.COMMA) and self._parse_string() 4584 4585 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4586 this = self._parse_field(any_token=True) 4587 kind = self._parse_types() 4588 path = self._parse_string() 4589 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4590 4591 return self.expression( 4592 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4593 ) 4594 4595 expressions = None 4596 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4597 self._match_l_paren() 4598 expressions = self._parse_csv(_parse_open_json_column_def) 4599 4600 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4601 4602 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4603 args = self._parse_csv(self._parse_bitwise) 4604 4605 if self._match(TokenType.IN): 4606 return self.expression( 4607 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4608 ) 4609 4610 if haystack_first: 4611 haystack = seq_get(args, 0) 4612 needle = seq_get(args, 1) 4613 else: 4614 needle = seq_get(args, 0) 4615 haystack = seq_get(args, 1) 4616 4617 return self.expression( 4618 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4619 ) 4620 4621 def _parse_predict(self) -> exp.Predict: 4622 self._match_text_seq("MODEL") 4623 this = self._parse_table() 4624 4625 self._match(TokenType.COMMA) 4626 self._match_text_seq("TABLE") 4627 4628 return self.expression( 4629 exp.Predict, 4630 this=this, 4631 expression=self._parse_table(), 4632 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4633 ) 4634 4635 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4636 args = self._parse_csv(self._parse_table) 4637 return exp.JoinHint(this=func_name.upper(), expressions=args) 4638 4639 def _parse_substring(self) -> exp.Substring: 4640 # Postgres supports the form: substring(string [from int] [for int]) 4641 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4642 4643 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4644 4645 if self._match(TokenType.FROM): 4646 args.append(self._parse_bitwise()) 4647 if self._match(TokenType.FOR): 4648 args.append(self._parse_bitwise()) 4649 4650 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4651 4652 def _parse_trim(self) -> exp.Trim: 4653 # https://www.w3resource.com/sql/character-functions/trim.php 4654 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4655 4656 position = None 4657 collation = None 4658 expression = None 4659 4660 if self._match_texts(self.TRIM_TYPES): 4661 position = self._prev.text.upper() 4662 4663 this = self._parse_bitwise() 4664 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4665 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4666 expression = self._parse_bitwise() 4667 4668 if invert_order: 4669 this, expression = expression, this 4670 4671 if self._match(TokenType.COLLATE): 4672 collation = self._parse_bitwise() 4673 4674 return self.expression( 4675 exp.Trim, this=this, position=position, expression=expression, collation=collation 4676 ) 4677 4678 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4679 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4680 4681 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4682 return self._parse_window(self._parse_id_var(), alias=True) 4683 4684 def _parse_respect_or_ignore_nulls( 4685 self, this: t.Optional[exp.Expression] 4686 ) -> t.Optional[exp.Expression]: 4687 if self._match_text_seq("IGNORE", "NULLS"): 4688 return self.expression(exp.IgnoreNulls, this=this) 4689 if self._match_text_seq("RESPECT", "NULLS"): 4690 return self.expression(exp.RespectNulls, this=this) 4691 return this 4692 4693 def _parse_window( 4694 self, this: t.Optional[exp.Expression], alias: bool = False 4695 ) -> t.Optional[exp.Expression]: 4696 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4697 self._match(TokenType.WHERE) 4698 this = self.expression( 4699 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4700 ) 4701 self._match_r_paren() 4702 4703 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4704 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4705 if self._match_text_seq("WITHIN", "GROUP"): 4706 order = self._parse_wrapped(self._parse_order) 4707 this = self.expression(exp.WithinGroup, this=this, expression=order) 4708 4709 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4710 # Some dialects choose to implement and some do not. 4711 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4712 4713 # There is some code above in _parse_lambda that handles 4714 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4715 4716 # The below changes handle 4717 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4718 4719 # Oracle allows both formats 4720 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4721 # and Snowflake chose to do the same for familiarity 4722 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4723 this = self._parse_respect_or_ignore_nulls(this) 4724 4725 # bigquery select from window x AS (partition by ...) 4726 if alias: 4727 over = None 4728 self._match(TokenType.ALIAS) 4729 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4730 return this 4731 else: 4732 over = self._prev.text.upper() 4733 4734 if not self._match(TokenType.L_PAREN): 4735 return self.expression( 4736 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4737 ) 4738 4739 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4740 4741 first = self._match(TokenType.FIRST) 4742 if self._match_text_seq("LAST"): 4743 first = False 4744 4745 partition, order = self._parse_partition_and_order() 4746 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4747 4748 if kind: 4749 self._match(TokenType.BETWEEN) 4750 start = self._parse_window_spec() 4751 self._match(TokenType.AND) 4752 end = self._parse_window_spec() 4753 4754 spec = self.expression( 4755 exp.WindowSpec, 4756 kind=kind, 4757 start=start["value"], 4758 start_side=start["side"], 4759 end=end["value"], 4760 end_side=end["side"], 4761 ) 4762 else: 4763 spec = None 4764 4765 self._match_r_paren() 4766 4767 window = self.expression( 4768 exp.Window, 4769 this=this, 4770 partition_by=partition, 4771 order=order, 4772 spec=spec, 4773 alias=window_alias, 4774 over=over, 4775 first=first, 4776 ) 4777 4778 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4779 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4780 return self._parse_window(window, alias=alias) 4781 4782 return window 4783 4784 def _parse_partition_and_order( 4785 self, 4786 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4787 return self._parse_partition_by(), self._parse_order() 4788 4789 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4790 self._match(TokenType.BETWEEN) 4791 4792 return { 4793 "value": ( 4794 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4795 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4796 or self._parse_bitwise() 4797 ), 4798 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4799 } 4800 4801 def _parse_alias( 4802 self, this: t.Optional[exp.Expression], explicit: bool = False 4803 ) -> t.Optional[exp.Expression]: 4804 any_token = self._match(TokenType.ALIAS) 4805 4806 if explicit and not any_token: 4807 return this 4808 4809 if self._match(TokenType.L_PAREN): 4810 aliases = self.expression( 4811 exp.Aliases, 4812 this=this, 4813 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4814 ) 4815 self._match_r_paren(aliases) 4816 return aliases 4817 4818 alias = self._parse_id_var(any_token) 4819 4820 if alias: 4821 return self.expression(exp.Alias, this=this, alias=alias) 4822 4823 return this 4824 4825 def _parse_id_var( 4826 self, 4827 any_token: bool = True, 4828 tokens: t.Optional[t.Collection[TokenType]] = None, 4829 ) -> t.Optional[exp.Expression]: 4830 identifier = self._parse_identifier() 4831 4832 if identifier: 4833 return identifier 4834 4835 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4836 quoted = self._prev.token_type == TokenType.STRING 4837 return exp.Identifier(this=self._prev.text, quoted=quoted) 4838 4839 return None 4840 4841 def _parse_string(self) -> t.Optional[exp.Expression]: 4842 if self._match(TokenType.STRING): 4843 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4844 return self._parse_placeholder() 4845 4846 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4847 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4848 4849 def _parse_number(self) -> t.Optional[exp.Expression]: 4850 if self._match(TokenType.NUMBER): 4851 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4852 return self._parse_placeholder() 4853 4854 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4855 if self._match(TokenType.IDENTIFIER): 4856 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4857 return self._parse_placeholder() 4858 4859 def _parse_var( 4860 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4861 ) -> t.Optional[exp.Expression]: 4862 if ( 4863 (any_token and self._advance_any()) 4864 or self._match(TokenType.VAR) 4865 or (self._match_set(tokens) if tokens else False) 4866 ): 4867 return self.expression(exp.Var, this=self._prev.text) 4868 return self._parse_placeholder() 4869 4870 def _advance_any(self) -> t.Optional[Token]: 4871 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4872 self._advance() 4873 return self._prev 4874 return None 4875 4876 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4877 return self._parse_var() or self._parse_string() 4878 4879 def _parse_null(self) -> t.Optional[exp.Expression]: 4880 if self._match_set(self.NULL_TOKENS): 4881 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4882 return self._parse_placeholder() 4883 4884 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4885 if self._match(TokenType.TRUE): 4886 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4887 if self._match(TokenType.FALSE): 4888 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4889 return self._parse_placeholder() 4890 4891 def _parse_star(self) -> t.Optional[exp.Expression]: 4892 if self._match(TokenType.STAR): 4893 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4894 return self._parse_placeholder() 4895 4896 def _parse_parameter(self) -> exp.Parameter: 4897 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4898 return ( 4899 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4900 ) 4901 4902 self._match(TokenType.L_BRACE) 4903 this = _parse_parameter_part() 4904 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4905 self._match(TokenType.R_BRACE) 4906 4907 return self.expression(exp.Parameter, this=this, expression=expression) 4908 4909 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4910 if self._match_set(self.PLACEHOLDER_PARSERS): 4911 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4912 if placeholder: 4913 return placeholder 4914 self._advance(-1) 4915 return None 4916 4917 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4918 if not self._match(TokenType.EXCEPT): 4919 return None 4920 if self._match(TokenType.L_PAREN, advance=False): 4921 return self._parse_wrapped_csv(self._parse_column) 4922 4923 except_column = self._parse_column() 4924 return [except_column] if except_column else None 4925 4926 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4927 if not self._match(TokenType.REPLACE): 4928 return None 4929 if self._match(TokenType.L_PAREN, advance=False): 4930 return self._parse_wrapped_csv(self._parse_expression) 4931 4932 replace_expression = self._parse_expression() 4933 return [replace_expression] if replace_expression else None 4934 4935 def _parse_csv( 4936 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4937 ) -> t.List[exp.Expression]: 4938 parse_result = parse_method() 4939 items = [parse_result] if parse_result is not None else [] 4940 4941 while self._match(sep): 4942 self._add_comments(parse_result) 4943 parse_result = parse_method() 4944 if parse_result is not None: 4945 items.append(parse_result) 4946 4947 return items 4948 4949 def _parse_tokens( 4950 self, parse_method: t.Callable, expressions: t.Dict 4951 ) -> t.Optional[exp.Expression]: 4952 this = parse_method() 4953 4954 while self._match_set(expressions): 4955 this = self.expression( 4956 expressions[self._prev.token_type], 4957 this=this, 4958 comments=self._prev_comments, 4959 expression=parse_method(), 4960 ) 4961 4962 return this 4963 4964 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4965 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4966 4967 def _parse_wrapped_csv( 4968 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4969 ) -> t.List[exp.Expression]: 4970 return self._parse_wrapped( 4971 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4972 ) 4973 4974 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4975 wrapped = self._match(TokenType.L_PAREN) 4976 if not wrapped and not optional: 4977 self.raise_error("Expecting (") 4978 parse_result = parse_method() 4979 if wrapped: 4980 self._match_r_paren() 4981 return parse_result 4982 4983 def _parse_expressions(self) -> t.List[exp.Expression]: 4984 return self._parse_csv(self._parse_expression) 4985 4986 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4987 return self._parse_select() or self._parse_set_operations( 4988 self._parse_expression() if alias else self._parse_conjunction() 4989 ) 4990 4991 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4992 return self._parse_query_modifiers( 4993 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4994 ) 4995 4996 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4997 this = None 4998 if self._match_texts(self.TRANSACTION_KIND): 4999 this = self._prev.text 5000 5001 self._match_texts({"TRANSACTION", "WORK"}) 5002 5003 modes = [] 5004 while True: 5005 mode = [] 5006 while self._match(TokenType.VAR): 5007 mode.append(self._prev.text) 5008 5009 if mode: 5010 modes.append(" ".join(mode)) 5011 if not self._match(TokenType.COMMA): 5012 break 5013 5014 return self.expression(exp.Transaction, this=this, modes=modes) 5015 5016 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5017 chain = None 5018 savepoint = None 5019 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5020 5021 self._match_texts({"TRANSACTION", "WORK"}) 5022 5023 if self._match_text_seq("TO"): 5024 self._match_text_seq("SAVEPOINT") 5025 savepoint = self._parse_id_var() 5026 5027 if self._match(TokenType.AND): 5028 chain = not self._match_text_seq("NO") 5029 self._match_text_seq("CHAIN") 5030 5031 if is_rollback: 5032 return self.expression(exp.Rollback, savepoint=savepoint) 5033 5034 return self.expression(exp.Commit, chain=chain) 5035 5036 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5037 if not self._match_text_seq("ADD"): 5038 return None 5039 5040 self._match(TokenType.COLUMN) 5041 exists_column = self._parse_exists(not_=True) 5042 expression = self._parse_field_def() 5043 5044 if expression: 5045 expression.set("exists", exists_column) 5046 5047 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5048 if self._match_texts(("FIRST", "AFTER")): 5049 position = self._prev.text 5050 column_position = self.expression( 5051 exp.ColumnPosition, this=self._parse_column(), position=position 5052 ) 5053 expression.set("position", column_position) 5054 5055 return expression 5056 5057 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5058 drop = self._match(TokenType.DROP) and self._parse_drop() 5059 if drop and not isinstance(drop, exp.Command): 5060 drop.set("kind", drop.args.get("kind", "COLUMN")) 5061 return drop 5062 5063 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5064 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5065 return self.expression( 5066 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5067 ) 5068 5069 def _parse_add_constraint(self) -> exp.AddConstraint: 5070 this = None 5071 kind = self._prev.token_type 5072 5073 if kind == TokenType.CONSTRAINT: 5074 this = self._parse_id_var() 5075 5076 if self._match_text_seq("CHECK"): 5077 expression = self._parse_wrapped(self._parse_conjunction) 5078 enforced = self._match_text_seq("ENFORCED") 5079 5080 return self.expression( 5081 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5082 ) 5083 5084 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5085 expression = self._parse_foreign_key() 5086 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5087 expression = self._parse_primary_key() 5088 else: 5089 expression = None 5090 5091 return self.expression(exp.AddConstraint, this=this, expression=expression) 5092 5093 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5094 index = self._index - 1 5095 5096 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5097 return self._parse_csv(self._parse_add_constraint) 5098 5099 self._retreat(index) 5100 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5101 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5102 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5103 5104 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5105 self._match(TokenType.COLUMN) 5106 column = self._parse_field(any_token=True) 5107 5108 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5109 return self.expression(exp.AlterColumn, this=column, drop=True) 5110 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5111 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5112 5113 self._match_text_seq("SET", "DATA") 5114 return self.expression( 5115 exp.AlterColumn, 5116 this=column, 5117 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5118 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5119 using=self._match(TokenType.USING) and self._parse_conjunction(), 5120 ) 5121 5122 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5123 index = self._index - 1 5124 5125 partition_exists = self._parse_exists() 5126 if self._match(TokenType.PARTITION, advance=False): 5127 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5128 5129 self._retreat(index) 5130 return self._parse_csv(self._parse_drop_column) 5131 5132 def _parse_alter_table_rename(self) -> exp.RenameTable: 5133 self._match_text_seq("TO") 5134 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5135 5136 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5137 start = self._prev 5138 5139 if not self._match(TokenType.TABLE): 5140 return self._parse_as_command(start) 5141 5142 exists = self._parse_exists() 5143 only = self._match_text_seq("ONLY") 5144 this = self._parse_table(schema=True) 5145 5146 if self._next: 5147 self._advance() 5148 5149 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5150 if parser: 5151 actions = ensure_list(parser(self)) 5152 5153 if not self._curr: 5154 return self.expression( 5155 exp.AlterTable, 5156 this=this, 5157 exists=exists, 5158 actions=actions, 5159 only=only, 5160 ) 5161 5162 return self._parse_as_command(start) 5163 5164 def _parse_merge(self) -> exp.Merge: 5165 self._match(TokenType.INTO) 5166 target = self._parse_table() 5167 5168 if target and self._match(TokenType.ALIAS, advance=False): 5169 target.set("alias", self._parse_table_alias()) 5170 5171 self._match(TokenType.USING) 5172 using = self._parse_table() 5173 5174 self._match(TokenType.ON) 5175 on = self._parse_conjunction() 5176 5177 return self.expression( 5178 exp.Merge, 5179 this=target, 5180 using=using, 5181 on=on, 5182 expressions=self._parse_when_matched(), 5183 ) 5184 5185 def _parse_when_matched(self) -> t.List[exp.When]: 5186 whens = [] 5187 5188 while self._match(TokenType.WHEN): 5189 matched = not self._match(TokenType.NOT) 5190 self._match_text_seq("MATCHED") 5191 source = ( 5192 False 5193 if self._match_text_seq("BY", "TARGET") 5194 else self._match_text_seq("BY", "SOURCE") 5195 ) 5196 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5197 5198 self._match(TokenType.THEN) 5199 5200 if self._match(TokenType.INSERT): 5201 _this = self._parse_star() 5202 if _this: 5203 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5204 else: 5205 then = self.expression( 5206 exp.Insert, 5207 this=self._parse_value(), 5208 expression=self._match(TokenType.VALUES) and self._parse_value(), 5209 ) 5210 elif self._match(TokenType.UPDATE): 5211 expressions = self._parse_star() 5212 if expressions: 5213 then = self.expression(exp.Update, expressions=expressions) 5214 else: 5215 then = self.expression( 5216 exp.Update, 5217 expressions=self._match(TokenType.SET) 5218 and self._parse_csv(self._parse_equality), 5219 ) 5220 elif self._match(TokenType.DELETE): 5221 then = self.expression(exp.Var, this=self._prev.text) 5222 else: 5223 then = None 5224 5225 whens.append( 5226 self.expression( 5227 exp.When, 5228 matched=matched, 5229 source=source, 5230 condition=condition, 5231 then=then, 5232 ) 5233 ) 5234 return whens 5235 5236 def _parse_show(self) -> t.Optional[exp.Expression]: 5237 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5238 if parser: 5239 return parser(self) 5240 return self._parse_as_command(self._prev) 5241 5242 def _parse_set_item_assignment( 5243 self, kind: t.Optional[str] = None 5244 ) -> t.Optional[exp.Expression]: 5245 index = self._index 5246 5247 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5248 return self._parse_set_transaction(global_=kind == "GLOBAL") 5249 5250 left = self._parse_primary() or self._parse_id_var() 5251 assignment_delimiter = self._match_texts(("=", "TO")) 5252 5253 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5254 self._retreat(index) 5255 return None 5256 5257 right = self._parse_statement() or self._parse_id_var() 5258 this = self.expression(exp.EQ, this=left, expression=right) 5259 5260 return self.expression(exp.SetItem, this=this, kind=kind) 5261 5262 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5263 self._match_text_seq("TRANSACTION") 5264 characteristics = self._parse_csv( 5265 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5266 ) 5267 return self.expression( 5268 exp.SetItem, 5269 expressions=characteristics, 5270 kind="TRANSACTION", 5271 **{"global": global_}, # type: ignore 5272 ) 5273 5274 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5275 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5276 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5277 5278 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5279 index = self._index 5280 set_ = self.expression( 5281 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5282 ) 5283 5284 if self._curr: 5285 self._retreat(index) 5286 return self._parse_as_command(self._prev) 5287 5288 return set_ 5289 5290 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5291 for option in options: 5292 if self._match_text_seq(*option.split(" ")): 5293 return exp.var(option) 5294 return None 5295 5296 def _parse_as_command(self, start: Token) -> exp.Command: 5297 while self._curr: 5298 self._advance() 5299 text = self._find_sql(start, self._prev) 5300 size = len(start.text) 5301 return exp.Command(this=text[:size], expression=text[size:]) 5302 5303 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5304 settings = [] 5305 5306 self._match_l_paren() 5307 kind = self._parse_id_var() 5308 5309 if self._match(TokenType.L_PAREN): 5310 while True: 5311 key = self._parse_id_var() 5312 value = self._parse_primary() 5313 5314 if not key and value is None: 5315 break 5316 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5317 self._match(TokenType.R_PAREN) 5318 5319 self._match_r_paren() 5320 5321 return self.expression( 5322 exp.DictProperty, 5323 this=this, 5324 kind=kind.this if kind else None, 5325 settings=settings, 5326 ) 5327 5328 def _parse_dict_range(self, this: str) -> exp.DictRange: 5329 self._match_l_paren() 5330 has_min = self._match_text_seq("MIN") 5331 if has_min: 5332 min = self._parse_var() or self._parse_primary() 5333 self._match_text_seq("MAX") 5334 max = self._parse_var() or self._parse_primary() 5335 else: 5336 max = self._parse_var() or self._parse_primary() 5337 min = exp.Literal.number(0) 5338 self._match_r_paren() 5339 return self.expression(exp.DictRange, this=this, min=min, max=max) 5340 5341 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5342 index = self._index 5343 expression = self._parse_column() 5344 if not self._match(TokenType.IN): 5345 self._retreat(index - 1) 5346 return None 5347 iterator = self._parse_column() 5348 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5349 return self.expression( 5350 exp.Comprehension, 5351 this=this, 5352 expression=expression, 5353 iterator=iterator, 5354 condition=condition, 5355 ) 5356 5357 def _find_parser( 5358 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5359 ) -> t.Optional[t.Callable]: 5360 if not self._curr: 5361 return None 5362 5363 index = self._index 5364 this = [] 5365 while True: 5366 # The current token might be multiple words 5367 curr = self._curr.text.upper() 5368 key = curr.split(" ") 5369 this.append(curr) 5370 5371 self._advance() 5372 result, trie = in_trie(trie, key) 5373 if result == TrieResult.FAILED: 5374 break 5375 5376 if result == TrieResult.EXISTS: 5377 subparser = parsers[" ".join(this)] 5378 return subparser 5379 5380 self._retreat(index) 5381 return None 5382 5383 def _match(self, token_type, advance=True, expression=None): 5384 if not self._curr: 5385 return None 5386 5387 if self._curr.token_type == token_type: 5388 if advance: 5389 self._advance() 5390 self._add_comments(expression) 5391 return True 5392 5393 return None 5394 5395 def _match_set(self, types, advance=True): 5396 if not self._curr: 5397 return None 5398 5399 if self._curr.token_type in types: 5400 if advance: 5401 self._advance() 5402 return True 5403 5404 return None 5405 5406 def _match_pair(self, token_type_a, token_type_b, advance=True): 5407 if not self._curr or not self._next: 5408 return None 5409 5410 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5411 if advance: 5412 self._advance(2) 5413 return True 5414 5415 return None 5416 5417 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5418 if not self._match(TokenType.L_PAREN, expression=expression): 5419 self.raise_error("Expecting (") 5420 5421 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5422 if not self._match(TokenType.R_PAREN, expression=expression): 5423 self.raise_error("Expecting )") 5424 5425 def _match_texts(self, texts, advance=True): 5426 if self._curr and self._curr.text.upper() in texts: 5427 if advance: 5428 self._advance() 5429 return True 5430 return False 5431 5432 def _match_text_seq(self, *texts, advance=True): 5433 index = self._index 5434 for text in texts: 5435 if self._curr and self._curr.text.upper() == text: 5436 self._advance() 5437 else: 5438 self._retreat(index) 5439 return False 5440 5441 if not advance: 5442 self._retreat(index) 5443 5444 return True 5445 5446 @t.overload 5447 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5448 ... 5449 5450 @t.overload 5451 def _replace_columns_with_dots( 5452 self, this: t.Optional[exp.Expression] 5453 ) -> t.Optional[exp.Expression]: 5454 ... 5455 5456 def _replace_columns_with_dots(self, this): 5457 if isinstance(this, exp.Dot): 5458 exp.replace_children(this, self._replace_columns_with_dots) 5459 elif isinstance(this, exp.Column): 5460 exp.replace_children(this, self._replace_columns_with_dots) 5461 table = this.args.get("table") 5462 this = ( 5463 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5464 ) 5465 5466 return this 5467 5468 def _replace_lambda( 5469 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5470 ) -> t.Optional[exp.Expression]: 5471 if not node: 5472 return node 5473 5474 for column in node.find_all(exp.Column): 5475 if column.parts[0].name in lambda_variables: 5476 dot_or_id = column.to_dot() if column.table else column.this 5477 parent = column.parent 5478 5479 while isinstance(parent, exp.Dot): 5480 if not isinstance(parent.parent, exp.Dot): 5481 parent.replace(dot_or_id) 5482 break 5483 parent = parent.parent 5484 else: 5485 if column is node: 5486 node = dot_or_id 5487 else: 5488 column.replace(dot_or_id) 5489 return node 5490 5491 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5492 return [ 5493 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5494 for value in values 5495 if value 5496 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.USE, 318 TokenType.VOLATILE, 319 TokenType.WINDOW, 320 *CREATABLES, 321 *SUBQUERY_PREDICATES, 322 *TYPE_TOKENS, 323 *NO_PAREN_FUNCTIONS, 324 } 325 326 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 327 328 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 329 TokenType.ANTI, 330 TokenType.APPLY, 331 TokenType.ASOF, 332 TokenType.FULL, 333 TokenType.LEFT, 334 TokenType.LOCK, 335 TokenType.NATURAL, 336 TokenType.OFFSET, 337 TokenType.RIGHT, 338 TokenType.SEMI, 339 TokenType.WINDOW, 340 } 341 342 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 343 344 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 345 346 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 347 348 FUNC_TOKENS = { 349 TokenType.COLLATE, 350 TokenType.COMMAND, 351 TokenType.CURRENT_DATE, 352 TokenType.CURRENT_DATETIME, 353 TokenType.CURRENT_TIMESTAMP, 354 TokenType.CURRENT_TIME, 355 TokenType.CURRENT_USER, 356 TokenType.FILTER, 357 TokenType.FIRST, 358 TokenType.FORMAT, 359 TokenType.GLOB, 360 TokenType.IDENTIFIER, 361 TokenType.INDEX, 362 TokenType.ISNULL, 363 TokenType.ILIKE, 364 TokenType.INSERT, 365 TokenType.LIKE, 366 TokenType.MERGE, 367 TokenType.OFFSET, 368 TokenType.PRIMARY_KEY, 369 TokenType.RANGE, 370 TokenType.REPLACE, 371 TokenType.RLIKE, 372 TokenType.ROW, 373 TokenType.UNNEST, 374 TokenType.VAR, 375 TokenType.LEFT, 376 TokenType.RIGHT, 377 TokenType.DATE, 378 TokenType.DATETIME, 379 TokenType.TABLE, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.WINDOW, 383 TokenType.XOR, 384 *TYPE_TOKENS, 385 *SUBQUERY_PREDICATES, 386 } 387 388 CONJUNCTION = { 389 TokenType.AND: exp.And, 390 TokenType.OR: exp.Or, 391 } 392 393 EQUALITY = { 394 TokenType.EQ: exp.EQ, 395 TokenType.NEQ: exp.NEQ, 396 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 397 } 398 399 COMPARISON = { 400 TokenType.GT: exp.GT, 401 TokenType.GTE: exp.GTE, 402 TokenType.LT: exp.LT, 403 TokenType.LTE: exp.LTE, 404 } 405 406 BITWISE = { 407 TokenType.AMP: exp.BitwiseAnd, 408 TokenType.CARET: exp.BitwiseXor, 409 TokenType.PIPE: exp.BitwiseOr, 410 TokenType.DPIPE: exp.DPipe, 411 } 412 413 TERM = { 414 TokenType.DASH: exp.Sub, 415 TokenType.PLUS: exp.Add, 416 TokenType.MOD: exp.Mod, 417 TokenType.COLLATE: exp.Collate, 418 } 419 420 FACTOR = { 421 TokenType.DIV: exp.IntDiv, 422 TokenType.LR_ARROW: exp.Distance, 423 TokenType.SLASH: exp.Div, 424 TokenType.STAR: exp.Mul, 425 } 426 427 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 428 429 TIMES = { 430 TokenType.TIME, 431 TokenType.TIMETZ, 432 } 433 434 TIMESTAMPS = { 435 TokenType.TIMESTAMP, 436 TokenType.TIMESTAMPTZ, 437 TokenType.TIMESTAMPLTZ, 438 *TIMES, 439 } 440 441 SET_OPERATIONS = { 442 TokenType.UNION, 443 TokenType.INTERSECT, 444 TokenType.EXCEPT, 445 } 446 447 JOIN_METHODS = { 448 TokenType.NATURAL, 449 TokenType.ASOF, 450 } 451 452 JOIN_SIDES = { 453 TokenType.LEFT, 454 TokenType.RIGHT, 455 TokenType.FULL, 456 } 457 458 JOIN_KINDS = { 459 TokenType.INNER, 460 TokenType.OUTER, 461 TokenType.CROSS, 462 TokenType.SEMI, 463 TokenType.ANTI, 464 } 465 466 JOIN_HINTS: t.Set[str] = set() 467 468 LAMBDAS = { 469 TokenType.ARROW: lambda self, expressions: self.expression( 470 exp.Lambda, 471 this=self._replace_lambda( 472 self._parse_conjunction(), 473 {node.name for node in expressions}, 474 ), 475 expressions=expressions, 476 ), 477 TokenType.FARROW: lambda self, expressions: self.expression( 478 exp.Kwarg, 479 this=exp.var(expressions[0].name), 480 expression=self._parse_conjunction(), 481 ), 482 } 483 484 COLUMN_OPERATORS = { 485 TokenType.DOT: None, 486 TokenType.DCOLON: lambda self, this, to: self.expression( 487 exp.Cast if self.STRICT_CAST else exp.TryCast, 488 this=this, 489 to=to, 490 ), 491 TokenType.ARROW: lambda self, this, path: self.expression( 492 exp.JSONExtract, 493 this=this, 494 expression=path, 495 ), 496 TokenType.DARROW: lambda self, this, path: self.expression( 497 exp.JSONExtractScalar, 498 this=this, 499 expression=path, 500 ), 501 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 502 exp.JSONBExtract, 503 this=this, 504 expression=path, 505 ), 506 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 507 exp.JSONBExtractScalar, 508 this=this, 509 expression=path, 510 ), 511 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 512 exp.JSONBContains, 513 this=this, 514 expression=key, 515 ), 516 } 517 518 EXPRESSION_PARSERS = { 519 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 520 exp.Column: lambda self: self._parse_column(), 521 exp.Condition: lambda self: self._parse_conjunction(), 522 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 523 exp.Expression: lambda self: self._parse_statement(), 524 exp.From: lambda self: self._parse_from(), 525 exp.Group: lambda self: self._parse_group(), 526 exp.Having: lambda self: self._parse_having(), 527 exp.Identifier: lambda self: self._parse_id_var(), 528 exp.Join: lambda self: self._parse_join(), 529 exp.Lambda: lambda self: self._parse_lambda(), 530 exp.Lateral: lambda self: self._parse_lateral(), 531 exp.Limit: lambda self: self._parse_limit(), 532 exp.Offset: lambda self: self._parse_offset(), 533 exp.Order: lambda self: self._parse_order(), 534 exp.Ordered: lambda self: self._parse_ordered(), 535 exp.Properties: lambda self: self._parse_properties(), 536 exp.Qualify: lambda self: self._parse_qualify(), 537 exp.Returning: lambda self: self._parse_returning(), 538 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 539 exp.Table: lambda self: self._parse_table_parts(), 540 exp.TableAlias: lambda self: self._parse_table_alias(), 541 exp.Where: lambda self: self._parse_where(), 542 exp.Window: lambda self: self._parse_named_window(), 543 exp.With: lambda self: self._parse_with(), 544 "JOIN_TYPE": lambda self: self._parse_join_parts(), 545 } 546 547 STATEMENT_PARSERS = { 548 TokenType.ALTER: lambda self: self._parse_alter(), 549 TokenType.BEGIN: lambda self: self._parse_transaction(), 550 TokenType.CACHE: lambda self: self._parse_cache(), 551 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 552 TokenType.COMMENT: lambda self: self._parse_comment(), 553 TokenType.CREATE: lambda self: self._parse_create(), 554 TokenType.DELETE: lambda self: self._parse_delete(), 555 TokenType.DESC: lambda self: self._parse_describe(), 556 TokenType.DESCRIBE: lambda self: self._parse_describe(), 557 TokenType.DROP: lambda self: self._parse_drop(), 558 TokenType.INSERT: lambda self: self._parse_insert(), 559 TokenType.KILL: lambda self: self._parse_kill(), 560 TokenType.LOAD: lambda self: self._parse_load(), 561 TokenType.MERGE: lambda self: self._parse_merge(), 562 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 563 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 564 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 565 TokenType.SET: lambda self: self._parse_set(), 566 TokenType.UNCACHE: lambda self: self._parse_uncache(), 567 TokenType.UPDATE: lambda self: self._parse_update(), 568 TokenType.USE: lambda self: self.expression( 569 exp.Use, 570 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 571 and exp.var(self._prev.text), 572 this=self._parse_table(schema=False), 573 ), 574 } 575 576 UNARY_PARSERS = { 577 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 578 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 579 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 580 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 581 } 582 583 PRIMARY_PARSERS = { 584 TokenType.STRING: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=True 586 ), 587 TokenType.NUMBER: lambda self, token: self.expression( 588 exp.Literal, this=token.text, is_string=False 589 ), 590 TokenType.STAR: lambda self, _: self.expression( 591 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 592 ), 593 TokenType.NULL: lambda self, _: self.expression(exp.Null), 594 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 595 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 596 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 597 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 598 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 599 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 600 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 601 exp.National, this=token.text 602 ), 603 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 604 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 605 exp.RawString, this=token.text 606 ), 607 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 608 } 609 610 PLACEHOLDER_PARSERS = { 611 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 612 TokenType.PARAMETER: lambda self: self._parse_parameter(), 613 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 614 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 615 else None, 616 } 617 618 RANGE_PARSERS = { 619 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 620 TokenType.GLOB: binary_range_parser(exp.Glob), 621 TokenType.ILIKE: binary_range_parser(exp.ILike), 622 TokenType.IN: lambda self, this: self._parse_in(this), 623 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 624 TokenType.IS: lambda self, this: self._parse_is(this), 625 TokenType.LIKE: binary_range_parser(exp.Like), 626 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 627 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 628 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 629 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 630 } 631 632 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 633 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 634 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 635 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 636 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 637 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 638 "CHECKSUM": lambda self: self._parse_checksum(), 639 "CLUSTER BY": lambda self: self._parse_cluster(), 640 "CLUSTERED": lambda self: self._parse_clustered_by(), 641 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 642 exp.CollateProperty, **kwargs 643 ), 644 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 645 "COPY": lambda self: self._parse_copy_property(), 646 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 647 "DEFINER": lambda self: self._parse_definer(), 648 "DETERMINISTIC": lambda self: self.expression( 649 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 650 ), 651 "DISTKEY": lambda self: self._parse_distkey(), 652 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 653 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 654 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 655 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 656 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 657 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 658 "FREESPACE": lambda self: self._parse_freespace(), 659 "HEAP": lambda self: self.expression(exp.HeapProperty), 660 "IMMUTABLE": lambda self: self.expression( 661 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 662 ), 663 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 664 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 665 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 666 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 667 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 668 "LIKE": lambda self: self._parse_create_like(), 669 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 670 "LOCK": lambda self: self._parse_locking(), 671 "LOCKING": lambda self: self._parse_locking(), 672 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 673 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 674 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 675 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 676 "NO": lambda self: self._parse_no_property(), 677 "ON": lambda self: self._parse_on_property(), 678 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 679 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 680 "PARTITION": lambda self: self._parse_partitioned_of(), 681 "PARTITION BY": lambda self: self._parse_partitioned_by(), 682 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 683 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 684 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 685 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 686 "REMOTE": lambda self: self._parse_remote_with_connection(), 687 "RETURNS": lambda self: self._parse_returns(), 688 "ROW": lambda self: self._parse_row(), 689 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 690 "SAMPLE": lambda self: self.expression( 691 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 692 ), 693 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 694 "SETTINGS": lambda self: self.expression( 695 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 696 ), 697 "SORTKEY": lambda self: self._parse_sortkey(), 698 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 699 "STABLE": lambda self: self.expression( 700 exp.StabilityProperty, this=exp.Literal.string("STABLE") 701 ), 702 "STORED": lambda self: self._parse_stored(), 703 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 704 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 705 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 706 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 707 "TO": lambda self: self._parse_to_table(), 708 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 709 "TRANSFORM": lambda self: self.expression( 710 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 711 ), 712 "TTL": lambda self: self._parse_ttl(), 713 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 714 "VOLATILE": lambda self: self._parse_volatile_property(), 715 "WITH": lambda self: self._parse_with_property(), 716 } 717 718 CONSTRAINT_PARSERS = { 719 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 720 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 721 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 722 "CHARACTER SET": lambda self: self.expression( 723 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 724 ), 725 "CHECK": lambda self: self.expression( 726 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 727 ), 728 "COLLATE": lambda self: self.expression( 729 exp.CollateColumnConstraint, this=self._parse_var() 730 ), 731 "COMMENT": lambda self: self.expression( 732 exp.CommentColumnConstraint, this=self._parse_string() 733 ), 734 "COMPRESS": lambda self: self._parse_compress(), 735 "CLUSTERED": lambda self: self.expression( 736 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 737 ), 738 "NONCLUSTERED": lambda self: self.expression( 739 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 740 ), 741 "DEFAULT": lambda self: self.expression( 742 exp.DefaultColumnConstraint, this=self._parse_bitwise() 743 ), 744 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 745 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 746 "FORMAT": lambda self: self.expression( 747 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 748 ), 749 "GENERATED": lambda self: self._parse_generated_as_identity(), 750 "IDENTITY": lambda self: self._parse_auto_increment(), 751 "INLINE": lambda self: self._parse_inline(), 752 "LIKE": lambda self: self._parse_create_like(), 753 "NOT": lambda self: self._parse_not_constraint(), 754 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 755 "ON": lambda self: ( 756 self._match(TokenType.UPDATE) 757 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 758 ) 759 or self.expression(exp.OnProperty, this=self._parse_id_var()), 760 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 761 "PERIOD": lambda self: self._parse_period_for_system_time(), 762 "PRIMARY KEY": lambda self: self._parse_primary_key(), 763 "REFERENCES": lambda self: self._parse_references(match=False), 764 "TITLE": lambda self: self.expression( 765 exp.TitleColumnConstraint, this=self._parse_var_or_string() 766 ), 767 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 768 "UNIQUE": lambda self: self._parse_unique(), 769 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 770 "WITH": lambda self: self.expression( 771 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 772 ), 773 } 774 775 ALTER_PARSERS = { 776 "ADD": lambda self: self._parse_alter_table_add(), 777 "ALTER": lambda self: self._parse_alter_table_alter(), 778 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 779 "DROP": lambda self: self._parse_alter_table_drop(), 780 "RENAME": lambda self: self._parse_alter_table_rename(), 781 } 782 783 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE", "PERIOD"} 784 785 NO_PAREN_FUNCTION_PARSERS = { 786 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 787 "CASE": lambda self: self._parse_case(), 788 "IF": lambda self: self._parse_if(), 789 "NEXT": lambda self: self._parse_next_value_for(), 790 } 791 792 INVALID_FUNC_NAME_TOKENS = { 793 TokenType.IDENTIFIER, 794 TokenType.STRING, 795 } 796 797 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 798 799 FUNCTION_PARSERS = { 800 "ANY_VALUE": lambda self: self._parse_any_value(), 801 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 802 "CONCAT": lambda self: self._parse_concat(), 803 "CONCAT_WS": lambda self: self._parse_concat_ws(), 804 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 805 "DECODE": lambda self: self._parse_decode(), 806 "EXTRACT": lambda self: self._parse_extract(), 807 "JSON_OBJECT": lambda self: self._parse_json_object(), 808 "JSON_TABLE": lambda self: self._parse_json_table(), 809 "LOG": lambda self: self._parse_logarithm(), 810 "MATCH": lambda self: self._parse_match_against(), 811 "OPENJSON": lambda self: self._parse_open_json(), 812 "POSITION": lambda self: self._parse_position(), 813 "PREDICT": lambda self: self._parse_predict(), 814 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 815 "STRING_AGG": lambda self: self._parse_string_agg(), 816 "SUBSTRING": lambda self: self._parse_substring(), 817 "TRIM": lambda self: self._parse_trim(), 818 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 819 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 820 } 821 822 QUERY_MODIFIER_PARSERS = { 823 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 824 TokenType.WHERE: lambda self: ("where", self._parse_where()), 825 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 826 TokenType.HAVING: lambda self: ("having", self._parse_having()), 827 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 828 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 829 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 830 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 831 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 832 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 833 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 834 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 835 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 836 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 837 TokenType.CLUSTER_BY: lambda self: ( 838 "cluster", 839 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 840 ), 841 TokenType.DISTRIBUTE_BY: lambda self: ( 842 "distribute", 843 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 844 ), 845 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 846 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 847 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 848 } 849 850 SET_PARSERS = { 851 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 852 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 853 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 854 "TRANSACTION": lambda self: self._parse_set_transaction(), 855 } 856 857 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 858 859 TYPE_LITERAL_PARSERS = { 860 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 861 } 862 863 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 864 865 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 866 867 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 868 869 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 870 TRANSACTION_CHARACTERISTICS = { 871 "ISOLATION LEVEL REPEATABLE READ", 872 "ISOLATION LEVEL READ COMMITTED", 873 "ISOLATION LEVEL READ UNCOMMITTED", 874 "ISOLATION LEVEL SERIALIZABLE", 875 "READ WRITE", 876 "READ ONLY", 877 } 878 879 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 880 881 CLONE_KEYWORDS = {"CLONE", "COPY"} 882 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 883 884 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 885 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 886 887 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 888 889 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 890 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 891 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 892 893 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 894 895 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 896 897 DISTINCT_TOKENS = {TokenType.DISTINCT} 898 899 NULL_TOKENS = {TokenType.NULL} 900 901 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 902 903 STRICT_CAST = True 904 905 # A NULL arg in CONCAT yields NULL by default 906 CONCAT_NULL_OUTPUTS_STRING = False 907 908 PREFIXED_PIVOT_COLUMNS = False 909 IDENTIFY_PIVOT_STRINGS = False 910 911 LOG_BASE_FIRST = True 912 LOG_DEFAULTS_TO_LN = False 913 914 # Whether or not ADD is present for each column added by ALTER TABLE 915 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 916 917 # Whether or not the table sample clause expects CSV syntax 918 TABLESAMPLE_CSV = False 919 920 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 921 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 922 923 # Whether the TRIM function expects the characters to trim as its first argument 924 TRIM_PATTERN_FIRST = False 925 926 __slots__ = ( 927 "error_level", 928 "error_message_context", 929 "max_errors", 930 "sql", 931 "errors", 932 "_tokens", 933 "_index", 934 "_curr", 935 "_next", 936 "_prev", 937 "_prev_comments", 938 "_tokenizer", 939 ) 940 941 # Autofilled 942 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 943 INDEX_OFFSET: int = 0 944 UNNEST_COLUMN_ONLY: bool = False 945 ALIAS_POST_TABLESAMPLE: bool = False 946 STRICT_STRING_CONCAT = False 947 SUPPORTS_USER_DEFINED_TYPES = True 948 NORMALIZE_FUNCTIONS = "upper" 949 NULL_ORDERING: str = "nulls_are_small" 950 SHOW_TRIE: t.Dict = {} 951 SET_TRIE: t.Dict = {} 952 FORMAT_MAPPING: t.Dict[str, str] = {} 953 FORMAT_TRIE: t.Dict = {} 954 TIME_MAPPING: t.Dict[str, str] = {} 955 TIME_TRIE: t.Dict = {} 956 957 def __init__( 958 self, 959 error_level: t.Optional[ErrorLevel] = None, 960 error_message_context: int = 100, 961 max_errors: int = 3, 962 ): 963 self.error_level = error_level or ErrorLevel.IMMEDIATE 964 self.error_message_context = error_message_context 965 self.max_errors = max_errors 966 self._tokenizer = self.TOKENIZER_CLASS() 967 self.reset() 968 969 def reset(self): 970 self.sql = "" 971 self.errors = [] 972 self._tokens = [] 973 self._index = 0 974 self._curr = None 975 self._next = None 976 self._prev = None 977 self._prev_comments = None 978 979 def parse( 980 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 981 ) -> t.List[t.Optional[exp.Expression]]: 982 """ 983 Parses a list of tokens and returns a list of syntax trees, one tree 984 per parsed SQL statement. 985 986 Args: 987 raw_tokens: The list of tokens. 988 sql: The original SQL string, used to produce helpful debug messages. 989 990 Returns: 991 The list of the produced syntax trees. 992 """ 993 return self._parse( 994 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 995 ) 996 997 def parse_into( 998 self, 999 expression_types: exp.IntoType, 1000 raw_tokens: t.List[Token], 1001 sql: t.Optional[str] = None, 1002 ) -> t.List[t.Optional[exp.Expression]]: 1003 """ 1004 Parses a list of tokens into a given Expression type. If a collection of Expression 1005 types is given instead, this method will try to parse the token list into each one 1006 of them, stopping at the first for which the parsing succeeds. 1007 1008 Args: 1009 expression_types: The expression type(s) to try and parse the token list into. 1010 raw_tokens: The list of tokens. 1011 sql: The original SQL string, used to produce helpful debug messages. 1012 1013 Returns: 1014 The target Expression. 1015 """ 1016 errors = [] 1017 for expression_type in ensure_list(expression_types): 1018 parser = self.EXPRESSION_PARSERS.get(expression_type) 1019 if not parser: 1020 raise TypeError(f"No parser registered for {expression_type}") 1021 1022 try: 1023 return self._parse(parser, raw_tokens, sql) 1024 except ParseError as e: 1025 e.errors[0]["into_expression"] = expression_type 1026 errors.append(e) 1027 1028 raise ParseError( 1029 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1030 errors=merge_errors(errors), 1031 ) from errors[-1] 1032 1033 def _parse( 1034 self, 1035 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1036 raw_tokens: t.List[Token], 1037 sql: t.Optional[str] = None, 1038 ) -> t.List[t.Optional[exp.Expression]]: 1039 self.reset() 1040 self.sql = sql or "" 1041 1042 total = len(raw_tokens) 1043 chunks: t.List[t.List[Token]] = [[]] 1044 1045 for i, token in enumerate(raw_tokens): 1046 if token.token_type == TokenType.SEMICOLON: 1047 if i < total - 1: 1048 chunks.append([]) 1049 else: 1050 chunks[-1].append(token) 1051 1052 expressions = [] 1053 1054 for tokens in chunks: 1055 self._index = -1 1056 self._tokens = tokens 1057 self._advance() 1058 1059 expressions.append(parse_method(self)) 1060 1061 if self._index < len(self._tokens): 1062 self.raise_error("Invalid expression / Unexpected token") 1063 1064 self.check_errors() 1065 1066 return expressions 1067 1068 def check_errors(self) -> None: 1069 """Logs or raises any found errors, depending on the chosen error level setting.""" 1070 if self.error_level == ErrorLevel.WARN: 1071 for error in self.errors: 1072 logger.error(str(error)) 1073 elif self.error_level == ErrorLevel.RAISE and self.errors: 1074 raise ParseError( 1075 concat_messages(self.errors, self.max_errors), 1076 errors=merge_errors(self.errors), 1077 ) 1078 1079 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1080 """ 1081 Appends an error in the list of recorded errors or raises it, depending on the chosen 1082 error level setting. 1083 """ 1084 token = token or self._curr or self._prev or Token.string("") 1085 start = token.start 1086 end = token.end + 1 1087 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1088 highlight = self.sql[start:end] 1089 end_context = self.sql[end : end + self.error_message_context] 1090 1091 error = ParseError.new( 1092 f"{message}. Line {token.line}, Col: {token.col}.\n" 1093 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1094 description=message, 1095 line=token.line, 1096 col=token.col, 1097 start_context=start_context, 1098 highlight=highlight, 1099 end_context=end_context, 1100 ) 1101 1102 if self.error_level == ErrorLevel.IMMEDIATE: 1103 raise error 1104 1105 self.errors.append(error) 1106 1107 def expression( 1108 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1109 ) -> E: 1110 """ 1111 Creates a new, validated Expression. 1112 1113 Args: 1114 exp_class: The expression class to instantiate. 1115 comments: An optional list of comments to attach to the expression. 1116 kwargs: The arguments to set for the expression along with their respective values. 1117 1118 Returns: 1119 The target expression. 1120 """ 1121 instance = exp_class(**kwargs) 1122 instance.add_comments(comments) if comments else self._add_comments(instance) 1123 return self.validate_expression(instance) 1124 1125 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1126 if expression and self._prev_comments: 1127 expression.add_comments(self._prev_comments) 1128 self._prev_comments = None 1129 1130 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1131 """ 1132 Validates an Expression, making sure that all its mandatory arguments are set. 1133 1134 Args: 1135 expression: The expression to validate. 1136 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1137 1138 Returns: 1139 The validated expression. 1140 """ 1141 if self.error_level != ErrorLevel.IGNORE: 1142 for error_message in expression.error_messages(args): 1143 self.raise_error(error_message) 1144 1145 return expression 1146 1147 def _find_sql(self, start: Token, end: Token) -> str: 1148 return self.sql[start.start : end.end + 1] 1149 1150 def _advance(self, times: int = 1) -> None: 1151 self._index += times 1152 self._curr = seq_get(self._tokens, self._index) 1153 self._next = seq_get(self._tokens, self._index + 1) 1154 1155 if self._index > 0: 1156 self._prev = self._tokens[self._index - 1] 1157 self._prev_comments = self._prev.comments 1158 else: 1159 self._prev = None 1160 self._prev_comments = None 1161 1162 def _retreat(self, index: int) -> None: 1163 if index != self._index: 1164 self._advance(index - self._index) 1165 1166 def _parse_command(self) -> exp.Command: 1167 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1168 1169 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1170 start = self._prev 1171 exists = self._parse_exists() if allow_exists else None 1172 1173 self._match(TokenType.ON) 1174 1175 kind = self._match_set(self.CREATABLES) and self._prev 1176 if not kind: 1177 return self._parse_as_command(start) 1178 1179 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1180 this = self._parse_user_defined_function(kind=kind.token_type) 1181 elif kind.token_type == TokenType.TABLE: 1182 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1183 elif kind.token_type == TokenType.COLUMN: 1184 this = self._parse_column() 1185 else: 1186 this = self._parse_id_var() 1187 1188 self._match(TokenType.IS) 1189 1190 return self.expression( 1191 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1192 ) 1193 1194 def _parse_to_table( 1195 self, 1196 ) -> exp.ToTableProperty: 1197 table = self._parse_table_parts(schema=True) 1198 return self.expression(exp.ToTableProperty, this=table) 1199 1200 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1201 def _parse_ttl(self) -> exp.Expression: 1202 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1203 this = self._parse_bitwise() 1204 1205 if self._match_text_seq("DELETE"): 1206 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1207 if self._match_text_seq("RECOMPRESS"): 1208 return self.expression( 1209 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1210 ) 1211 if self._match_text_seq("TO", "DISK"): 1212 return self.expression( 1213 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1214 ) 1215 if self._match_text_seq("TO", "VOLUME"): 1216 return self.expression( 1217 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1218 ) 1219 1220 return this 1221 1222 expressions = self._parse_csv(_parse_ttl_action) 1223 where = self._parse_where() 1224 group = self._parse_group() 1225 1226 aggregates = None 1227 if group and self._match(TokenType.SET): 1228 aggregates = self._parse_csv(self._parse_set_item) 1229 1230 return self.expression( 1231 exp.MergeTreeTTL, 1232 expressions=expressions, 1233 where=where, 1234 group=group, 1235 aggregates=aggregates, 1236 ) 1237 1238 def _parse_statement(self) -> t.Optional[exp.Expression]: 1239 if self._curr is None: 1240 return None 1241 1242 if self._match_set(self.STATEMENT_PARSERS): 1243 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1244 1245 if self._match_set(Tokenizer.COMMANDS): 1246 return self._parse_command() 1247 1248 expression = self._parse_expression() 1249 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1250 return self._parse_query_modifiers(expression) 1251 1252 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1253 start = self._prev 1254 temporary = self._match(TokenType.TEMPORARY) 1255 materialized = self._match_text_seq("MATERIALIZED") 1256 1257 kind = self._match_set(self.CREATABLES) and self._prev.text 1258 if not kind: 1259 return self._parse_as_command(start) 1260 1261 return self.expression( 1262 exp.Drop, 1263 comments=start.comments, 1264 exists=exists or self._parse_exists(), 1265 this=self._parse_table(schema=True), 1266 kind=kind, 1267 temporary=temporary, 1268 materialized=materialized, 1269 cascade=self._match_text_seq("CASCADE"), 1270 constraints=self._match_text_seq("CONSTRAINTS"), 1271 purge=self._match_text_seq("PURGE"), 1272 ) 1273 1274 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1275 return ( 1276 self._match_text_seq("IF") 1277 and (not not_ or self._match(TokenType.NOT)) 1278 and self._match(TokenType.EXISTS) 1279 ) 1280 1281 def _parse_create(self) -> exp.Create | exp.Command: 1282 # Note: this can't be None because we've matched a statement parser 1283 start = self._prev 1284 comments = self._prev_comments 1285 1286 replace = start.text.upper() == "REPLACE" or self._match_pair( 1287 TokenType.OR, TokenType.REPLACE 1288 ) 1289 unique = self._match(TokenType.UNIQUE) 1290 1291 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1292 self._advance() 1293 1294 properties = None 1295 create_token = self._match_set(self.CREATABLES) and self._prev 1296 1297 if not create_token: 1298 # exp.Properties.Location.POST_CREATE 1299 properties = self._parse_properties() 1300 create_token = self._match_set(self.CREATABLES) and self._prev 1301 1302 if not properties or not create_token: 1303 return self._parse_as_command(start) 1304 1305 exists = self._parse_exists(not_=True) 1306 this = None 1307 expression: t.Optional[exp.Expression] = None 1308 indexes = None 1309 no_schema_binding = None 1310 begin = None 1311 end = None 1312 clone = None 1313 1314 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1315 nonlocal properties 1316 if properties and temp_props: 1317 properties.expressions.extend(temp_props.expressions) 1318 elif temp_props: 1319 properties = temp_props 1320 1321 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1322 this = self._parse_user_defined_function(kind=create_token.token_type) 1323 1324 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1325 extend_props(self._parse_properties()) 1326 1327 self._match(TokenType.ALIAS) 1328 1329 if self._match(TokenType.COMMAND): 1330 expression = self._parse_as_command(self._prev) 1331 else: 1332 begin = self._match(TokenType.BEGIN) 1333 return_ = self._match_text_seq("RETURN") 1334 1335 if self._match(TokenType.STRING, advance=False): 1336 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1337 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1338 expression = self._parse_string() 1339 extend_props(self._parse_properties()) 1340 else: 1341 expression = self._parse_statement() 1342 1343 end = self._match_text_seq("END") 1344 1345 if return_: 1346 expression = self.expression(exp.Return, this=expression) 1347 elif create_token.token_type == TokenType.INDEX: 1348 this = self._parse_index(index=self._parse_id_var()) 1349 elif create_token.token_type in self.DB_CREATABLES: 1350 table_parts = self._parse_table_parts(schema=True) 1351 1352 # exp.Properties.Location.POST_NAME 1353 self._match(TokenType.COMMA) 1354 extend_props(self._parse_properties(before=True)) 1355 1356 this = self._parse_schema(this=table_parts) 1357 1358 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1359 extend_props(self._parse_properties()) 1360 1361 self._match(TokenType.ALIAS) 1362 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1363 # exp.Properties.Location.POST_ALIAS 1364 extend_props(self._parse_properties()) 1365 1366 expression = self._parse_ddl_select() 1367 1368 if create_token.token_type == TokenType.TABLE: 1369 # exp.Properties.Location.POST_EXPRESSION 1370 extend_props(self._parse_properties()) 1371 1372 indexes = [] 1373 while True: 1374 index = self._parse_index() 1375 1376 # exp.Properties.Location.POST_INDEX 1377 extend_props(self._parse_properties()) 1378 1379 if not index: 1380 break 1381 else: 1382 self._match(TokenType.COMMA) 1383 indexes.append(index) 1384 elif create_token.token_type == TokenType.VIEW: 1385 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1386 no_schema_binding = True 1387 1388 shallow = self._match_text_seq("SHALLOW") 1389 1390 if self._match_texts(self.CLONE_KEYWORDS): 1391 copy = self._prev.text.lower() == "copy" 1392 clone = self._parse_table(schema=True) 1393 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1394 clone_kind = ( 1395 self._match(TokenType.L_PAREN) 1396 and self._match_texts(self.CLONE_KINDS) 1397 and self._prev.text.upper() 1398 ) 1399 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1400 self._match(TokenType.R_PAREN) 1401 clone = self.expression( 1402 exp.Clone, 1403 this=clone, 1404 when=when, 1405 kind=clone_kind, 1406 shallow=shallow, 1407 expression=clone_expression, 1408 copy=copy, 1409 ) 1410 1411 return self.expression( 1412 exp.Create, 1413 comments=comments, 1414 this=this, 1415 kind=create_token.text, 1416 replace=replace, 1417 unique=unique, 1418 expression=expression, 1419 exists=exists, 1420 properties=properties, 1421 indexes=indexes, 1422 no_schema_binding=no_schema_binding, 1423 begin=begin, 1424 end=end, 1425 clone=clone, 1426 ) 1427 1428 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1429 # only used for teradata currently 1430 self._match(TokenType.COMMA) 1431 1432 kwargs = { 1433 "no": self._match_text_seq("NO"), 1434 "dual": self._match_text_seq("DUAL"), 1435 "before": self._match_text_seq("BEFORE"), 1436 "default": self._match_text_seq("DEFAULT"), 1437 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1438 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1439 "after": self._match_text_seq("AFTER"), 1440 "minimum": self._match_texts(("MIN", "MINIMUM")), 1441 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1442 } 1443 1444 if self._match_texts(self.PROPERTY_PARSERS): 1445 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1446 try: 1447 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1448 except TypeError: 1449 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1450 1451 return None 1452 1453 def _parse_property(self) -> t.Optional[exp.Expression]: 1454 if self._match_texts(self.PROPERTY_PARSERS): 1455 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1456 1457 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1458 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1459 1460 if self._match_text_seq("COMPOUND", "SORTKEY"): 1461 return self._parse_sortkey(compound=True) 1462 1463 if self._match_text_seq("SQL", "SECURITY"): 1464 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1465 1466 index = self._index 1467 key = self._parse_column() 1468 1469 if not self._match(TokenType.EQ): 1470 self._retreat(index) 1471 return None 1472 1473 return self.expression( 1474 exp.Property, 1475 this=key.to_dot() if isinstance(key, exp.Column) else key, 1476 value=self._parse_column() or self._parse_var(any_token=True), 1477 ) 1478 1479 def _parse_stored(self) -> exp.FileFormatProperty: 1480 self._match(TokenType.ALIAS) 1481 1482 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1483 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1484 1485 return self.expression( 1486 exp.FileFormatProperty, 1487 this=self.expression( 1488 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1489 ) 1490 if input_format or output_format 1491 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1492 ) 1493 1494 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1495 self._match(TokenType.EQ) 1496 self._match(TokenType.ALIAS) 1497 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1498 1499 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1500 properties = [] 1501 while True: 1502 if before: 1503 prop = self._parse_property_before() 1504 else: 1505 prop = self._parse_property() 1506 1507 if not prop: 1508 break 1509 for p in ensure_list(prop): 1510 properties.append(p) 1511 1512 if properties: 1513 return self.expression(exp.Properties, expressions=properties) 1514 1515 return None 1516 1517 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1518 return self.expression( 1519 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1520 ) 1521 1522 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1523 if self._index >= 2: 1524 pre_volatile_token = self._tokens[self._index - 2] 1525 else: 1526 pre_volatile_token = None 1527 1528 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1529 return exp.VolatileProperty() 1530 1531 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1532 1533 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1534 self._match_pair(TokenType.EQ, TokenType.ON) 1535 1536 prop = self.expression(exp.WithSystemVersioningProperty) 1537 if self._match(TokenType.L_PAREN): 1538 self._match_text_seq("HISTORY_TABLE", "=") 1539 prop.set("this", self._parse_table_parts()) 1540 1541 if self._match(TokenType.COMMA): 1542 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1543 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1544 1545 self._match_r_paren() 1546 1547 return prop 1548 1549 def _parse_with_property( 1550 self, 1551 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1552 if self._match(TokenType.L_PAREN, advance=False): 1553 return self._parse_wrapped_csv(self._parse_property) 1554 1555 if self._match_text_seq("JOURNAL"): 1556 return self._parse_withjournaltable() 1557 1558 if self._match_text_seq("DATA"): 1559 return self._parse_withdata(no=False) 1560 elif self._match_text_seq("NO", "DATA"): 1561 return self._parse_withdata(no=True) 1562 1563 if not self._next: 1564 return None 1565 1566 return self._parse_withisolatedloading() 1567 1568 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1569 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1570 self._match(TokenType.EQ) 1571 1572 user = self._parse_id_var() 1573 self._match(TokenType.PARAMETER) 1574 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1575 1576 if not user or not host: 1577 return None 1578 1579 return exp.DefinerProperty(this=f"{user}@{host}") 1580 1581 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1582 self._match(TokenType.TABLE) 1583 self._match(TokenType.EQ) 1584 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1585 1586 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1587 return self.expression(exp.LogProperty, no=no) 1588 1589 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1590 return self.expression(exp.JournalProperty, **kwargs) 1591 1592 def _parse_checksum(self) -> exp.ChecksumProperty: 1593 self._match(TokenType.EQ) 1594 1595 on = None 1596 if self._match(TokenType.ON): 1597 on = True 1598 elif self._match_text_seq("OFF"): 1599 on = False 1600 1601 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1602 1603 def _parse_cluster(self) -> exp.Cluster: 1604 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1605 1606 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1607 self._match_text_seq("BY") 1608 1609 self._match_l_paren() 1610 expressions = self._parse_csv(self._parse_column) 1611 self._match_r_paren() 1612 1613 if self._match_text_seq("SORTED", "BY"): 1614 self._match_l_paren() 1615 sorted_by = self._parse_csv(self._parse_ordered) 1616 self._match_r_paren() 1617 else: 1618 sorted_by = None 1619 1620 self._match(TokenType.INTO) 1621 buckets = self._parse_number() 1622 self._match_text_seq("BUCKETS") 1623 1624 return self.expression( 1625 exp.ClusteredByProperty, 1626 expressions=expressions, 1627 sorted_by=sorted_by, 1628 buckets=buckets, 1629 ) 1630 1631 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1632 if not self._match_text_seq("GRANTS"): 1633 self._retreat(self._index - 1) 1634 return None 1635 1636 return self.expression(exp.CopyGrantsProperty) 1637 1638 def _parse_freespace(self) -> exp.FreespaceProperty: 1639 self._match(TokenType.EQ) 1640 return self.expression( 1641 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1642 ) 1643 1644 def _parse_mergeblockratio( 1645 self, no: bool = False, default: bool = False 1646 ) -> exp.MergeBlockRatioProperty: 1647 if self._match(TokenType.EQ): 1648 return self.expression( 1649 exp.MergeBlockRatioProperty, 1650 this=self._parse_number(), 1651 percent=self._match(TokenType.PERCENT), 1652 ) 1653 1654 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1655 1656 def _parse_datablocksize( 1657 self, 1658 default: t.Optional[bool] = None, 1659 minimum: t.Optional[bool] = None, 1660 maximum: t.Optional[bool] = None, 1661 ) -> exp.DataBlocksizeProperty: 1662 self._match(TokenType.EQ) 1663 size = self._parse_number() 1664 1665 units = None 1666 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1667 units = self._prev.text 1668 1669 return self.expression( 1670 exp.DataBlocksizeProperty, 1671 size=size, 1672 units=units, 1673 default=default, 1674 minimum=minimum, 1675 maximum=maximum, 1676 ) 1677 1678 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1679 self._match(TokenType.EQ) 1680 always = self._match_text_seq("ALWAYS") 1681 manual = self._match_text_seq("MANUAL") 1682 never = self._match_text_seq("NEVER") 1683 default = self._match_text_seq("DEFAULT") 1684 1685 autotemp = None 1686 if self._match_text_seq("AUTOTEMP"): 1687 autotemp = self._parse_schema() 1688 1689 return self.expression( 1690 exp.BlockCompressionProperty, 1691 always=always, 1692 manual=manual, 1693 never=never, 1694 default=default, 1695 autotemp=autotemp, 1696 ) 1697 1698 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1699 no = self._match_text_seq("NO") 1700 concurrent = self._match_text_seq("CONCURRENT") 1701 self._match_text_seq("ISOLATED", "LOADING") 1702 for_all = self._match_text_seq("FOR", "ALL") 1703 for_insert = self._match_text_seq("FOR", "INSERT") 1704 for_none = self._match_text_seq("FOR", "NONE") 1705 return self.expression( 1706 exp.IsolatedLoadingProperty, 1707 no=no, 1708 concurrent=concurrent, 1709 for_all=for_all, 1710 for_insert=for_insert, 1711 for_none=for_none, 1712 ) 1713 1714 def _parse_locking(self) -> exp.LockingProperty: 1715 if self._match(TokenType.TABLE): 1716 kind = "TABLE" 1717 elif self._match(TokenType.VIEW): 1718 kind = "VIEW" 1719 elif self._match(TokenType.ROW): 1720 kind = "ROW" 1721 elif self._match_text_seq("DATABASE"): 1722 kind = "DATABASE" 1723 else: 1724 kind = None 1725 1726 if kind in ("DATABASE", "TABLE", "VIEW"): 1727 this = self._parse_table_parts() 1728 else: 1729 this = None 1730 1731 if self._match(TokenType.FOR): 1732 for_or_in = "FOR" 1733 elif self._match(TokenType.IN): 1734 for_or_in = "IN" 1735 else: 1736 for_or_in = None 1737 1738 if self._match_text_seq("ACCESS"): 1739 lock_type = "ACCESS" 1740 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1741 lock_type = "EXCLUSIVE" 1742 elif self._match_text_seq("SHARE"): 1743 lock_type = "SHARE" 1744 elif self._match_text_seq("READ"): 1745 lock_type = "READ" 1746 elif self._match_text_seq("WRITE"): 1747 lock_type = "WRITE" 1748 elif self._match_text_seq("CHECKSUM"): 1749 lock_type = "CHECKSUM" 1750 else: 1751 lock_type = None 1752 1753 override = self._match_text_seq("OVERRIDE") 1754 1755 return self.expression( 1756 exp.LockingProperty, 1757 this=this, 1758 kind=kind, 1759 for_or_in=for_or_in, 1760 lock_type=lock_type, 1761 override=override, 1762 ) 1763 1764 def _parse_partition_by(self) -> t.List[exp.Expression]: 1765 if self._match(TokenType.PARTITION_BY): 1766 return self._parse_csv(self._parse_conjunction) 1767 return [] 1768 1769 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1770 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1771 if self._match_text_seq("MINVALUE"): 1772 return exp.var("MINVALUE") 1773 if self._match_text_seq("MAXVALUE"): 1774 return exp.var("MAXVALUE") 1775 return self._parse_bitwise() 1776 1777 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1778 expression = None 1779 from_expressions = None 1780 to_expressions = None 1781 1782 if self._match(TokenType.IN): 1783 this = self._parse_wrapped_csv(self._parse_bitwise) 1784 elif self._match(TokenType.FROM): 1785 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1786 self._match_text_seq("TO") 1787 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 1788 elif self._match_text_seq("WITH", "(", "MODULUS"): 1789 this = self._parse_number() 1790 self._match_text_seq(",", "REMAINDER") 1791 expression = self._parse_number() 1792 self._match_r_paren() 1793 else: 1794 self.raise_error("Failed to parse partition bound spec.") 1795 1796 return self.expression( 1797 exp.PartitionBoundSpec, 1798 this=this, 1799 expression=expression, 1800 from_expressions=from_expressions, 1801 to_expressions=to_expressions, 1802 ) 1803 1804 # https://www.postgresql.org/docs/current/sql-createtable.html 1805 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 1806 if not self._match_text_seq("OF"): 1807 self._retreat(self._index - 1) 1808 return None 1809 1810 this = self._parse_table(schema=True) 1811 1812 if self._match(TokenType.DEFAULT): 1813 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 1814 elif self._match_text_seq("FOR", "VALUES"): 1815 expression = self._parse_partition_bound_spec() 1816 else: 1817 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 1818 1819 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 1820 1821 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1822 self._match(TokenType.EQ) 1823 return self.expression( 1824 exp.PartitionedByProperty, 1825 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1826 ) 1827 1828 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1829 if self._match_text_seq("AND", "STATISTICS"): 1830 statistics = True 1831 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1832 statistics = False 1833 else: 1834 statistics = None 1835 1836 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1837 1838 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1839 if self._match_text_seq("PRIMARY", "INDEX"): 1840 return exp.NoPrimaryIndexProperty() 1841 return None 1842 1843 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1844 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1845 return exp.OnCommitProperty() 1846 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1847 return exp.OnCommitProperty(delete=True) 1848 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1849 1850 def _parse_distkey(self) -> exp.DistKeyProperty: 1851 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1852 1853 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1854 table = self._parse_table(schema=True) 1855 1856 options = [] 1857 while self._match_texts(("INCLUDING", "EXCLUDING")): 1858 this = self._prev.text.upper() 1859 1860 id_var = self._parse_id_var() 1861 if not id_var: 1862 return None 1863 1864 options.append( 1865 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1866 ) 1867 1868 return self.expression(exp.LikeProperty, this=table, expressions=options) 1869 1870 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1871 return self.expression( 1872 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1873 ) 1874 1875 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1876 self._match(TokenType.EQ) 1877 return self.expression( 1878 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1879 ) 1880 1881 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1882 self._match_text_seq("WITH", "CONNECTION") 1883 return self.expression( 1884 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1885 ) 1886 1887 def _parse_returns(self) -> exp.ReturnsProperty: 1888 value: t.Optional[exp.Expression] 1889 is_table = self._match(TokenType.TABLE) 1890 1891 if is_table: 1892 if self._match(TokenType.LT): 1893 value = self.expression( 1894 exp.Schema, 1895 this="TABLE", 1896 expressions=self._parse_csv(self._parse_struct_types), 1897 ) 1898 if not self._match(TokenType.GT): 1899 self.raise_error("Expecting >") 1900 else: 1901 value = self._parse_schema(exp.var("TABLE")) 1902 else: 1903 value = self._parse_types() 1904 1905 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1906 1907 def _parse_describe(self) -> exp.Describe: 1908 kind = self._match_set(self.CREATABLES) and self._prev.text 1909 this = self._parse_table(schema=True) 1910 properties = self._parse_properties() 1911 expressions = properties.expressions if properties else None 1912 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1913 1914 def _parse_insert(self) -> exp.Insert: 1915 comments = ensure_list(self._prev_comments) 1916 overwrite = self._match(TokenType.OVERWRITE) 1917 ignore = self._match(TokenType.IGNORE) 1918 local = self._match_text_seq("LOCAL") 1919 alternative = None 1920 1921 if self._match_text_seq("DIRECTORY"): 1922 this: t.Optional[exp.Expression] = self.expression( 1923 exp.Directory, 1924 this=self._parse_var_or_string(), 1925 local=local, 1926 row_format=self._parse_row_format(match_row=True), 1927 ) 1928 else: 1929 if self._match(TokenType.OR): 1930 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1931 1932 self._match(TokenType.INTO) 1933 comments += ensure_list(self._prev_comments) 1934 self._match(TokenType.TABLE) 1935 this = self._parse_table(schema=True) 1936 1937 returning = self._parse_returning() 1938 1939 return self.expression( 1940 exp.Insert, 1941 comments=comments, 1942 this=this, 1943 by_name=self._match_text_seq("BY", "NAME"), 1944 exists=self._parse_exists(), 1945 partition=self._parse_partition(), 1946 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1947 and self._parse_conjunction(), 1948 expression=self._parse_ddl_select(), 1949 conflict=self._parse_on_conflict(), 1950 returning=returning or self._parse_returning(), 1951 overwrite=overwrite, 1952 alternative=alternative, 1953 ignore=ignore, 1954 ) 1955 1956 def _parse_kill(self) -> exp.Kill: 1957 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1958 1959 return self.expression( 1960 exp.Kill, 1961 this=self._parse_primary(), 1962 kind=kind, 1963 ) 1964 1965 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1966 conflict = self._match_text_seq("ON", "CONFLICT") 1967 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1968 1969 if not conflict and not duplicate: 1970 return None 1971 1972 nothing = None 1973 expressions = None 1974 key = None 1975 constraint = None 1976 1977 if conflict: 1978 if self._match_text_seq("ON", "CONSTRAINT"): 1979 constraint = self._parse_id_var() 1980 else: 1981 key = self._parse_csv(self._parse_value) 1982 1983 self._match_text_seq("DO") 1984 if self._match_text_seq("NOTHING"): 1985 nothing = True 1986 else: 1987 self._match(TokenType.UPDATE) 1988 self._match(TokenType.SET) 1989 expressions = self._parse_csv(self._parse_equality) 1990 1991 return self.expression( 1992 exp.OnConflict, 1993 duplicate=duplicate, 1994 expressions=expressions, 1995 nothing=nothing, 1996 key=key, 1997 constraint=constraint, 1998 ) 1999 2000 def _parse_returning(self) -> t.Optional[exp.Returning]: 2001 if not self._match(TokenType.RETURNING): 2002 return None 2003 return self.expression( 2004 exp.Returning, 2005 expressions=self._parse_csv(self._parse_expression), 2006 into=self._match(TokenType.INTO) and self._parse_table_part(), 2007 ) 2008 2009 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2010 if not self._match(TokenType.FORMAT): 2011 return None 2012 return self._parse_row_format() 2013 2014 def _parse_row_format( 2015 self, match_row: bool = False 2016 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2017 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2018 return None 2019 2020 if self._match_text_seq("SERDE"): 2021 this = self._parse_string() 2022 2023 serde_properties = None 2024 if self._match(TokenType.SERDE_PROPERTIES): 2025 serde_properties = self.expression( 2026 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 2027 ) 2028 2029 return self.expression( 2030 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2031 ) 2032 2033 self._match_text_seq("DELIMITED") 2034 2035 kwargs = {} 2036 2037 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2038 kwargs["fields"] = self._parse_string() 2039 if self._match_text_seq("ESCAPED", "BY"): 2040 kwargs["escaped"] = self._parse_string() 2041 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2042 kwargs["collection_items"] = self._parse_string() 2043 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2044 kwargs["map_keys"] = self._parse_string() 2045 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2046 kwargs["lines"] = self._parse_string() 2047 if self._match_text_seq("NULL", "DEFINED", "AS"): 2048 kwargs["null"] = self._parse_string() 2049 2050 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2051 2052 def _parse_load(self) -> exp.LoadData | exp.Command: 2053 if self._match_text_seq("DATA"): 2054 local = self._match_text_seq("LOCAL") 2055 self._match_text_seq("INPATH") 2056 inpath = self._parse_string() 2057 overwrite = self._match(TokenType.OVERWRITE) 2058 self._match_pair(TokenType.INTO, TokenType.TABLE) 2059 2060 return self.expression( 2061 exp.LoadData, 2062 this=self._parse_table(schema=True), 2063 local=local, 2064 overwrite=overwrite, 2065 inpath=inpath, 2066 partition=self._parse_partition(), 2067 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2068 serde=self._match_text_seq("SERDE") and self._parse_string(), 2069 ) 2070 return self._parse_as_command(self._prev) 2071 2072 def _parse_delete(self) -> exp.Delete: 2073 # This handles MySQL's "Multiple-Table Syntax" 2074 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2075 tables = None 2076 comments = self._prev_comments 2077 if not self._match(TokenType.FROM, advance=False): 2078 tables = self._parse_csv(self._parse_table) or None 2079 2080 returning = self._parse_returning() 2081 2082 return self.expression( 2083 exp.Delete, 2084 comments=comments, 2085 tables=tables, 2086 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2087 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2088 where=self._parse_where(), 2089 returning=returning or self._parse_returning(), 2090 limit=self._parse_limit(), 2091 ) 2092 2093 def _parse_update(self) -> exp.Update: 2094 comments = self._prev_comments 2095 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2096 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2097 returning = self._parse_returning() 2098 return self.expression( 2099 exp.Update, 2100 comments=comments, 2101 **{ # type: ignore 2102 "this": this, 2103 "expressions": expressions, 2104 "from": self._parse_from(joins=True), 2105 "where": self._parse_where(), 2106 "returning": returning or self._parse_returning(), 2107 "order": self._parse_order(), 2108 "limit": self._parse_limit(), 2109 }, 2110 ) 2111 2112 def _parse_uncache(self) -> exp.Uncache: 2113 if not self._match(TokenType.TABLE): 2114 self.raise_error("Expecting TABLE after UNCACHE") 2115 2116 return self.expression( 2117 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2118 ) 2119 2120 def _parse_cache(self) -> exp.Cache: 2121 lazy = self._match_text_seq("LAZY") 2122 self._match(TokenType.TABLE) 2123 table = self._parse_table(schema=True) 2124 2125 options = [] 2126 if self._match_text_seq("OPTIONS"): 2127 self._match_l_paren() 2128 k = self._parse_string() 2129 self._match(TokenType.EQ) 2130 v = self._parse_string() 2131 options = [k, v] 2132 self._match_r_paren() 2133 2134 self._match(TokenType.ALIAS) 2135 return self.expression( 2136 exp.Cache, 2137 this=table, 2138 lazy=lazy, 2139 options=options, 2140 expression=self._parse_select(nested=True), 2141 ) 2142 2143 def _parse_partition(self) -> t.Optional[exp.Partition]: 2144 if not self._match(TokenType.PARTITION): 2145 return None 2146 2147 return self.expression( 2148 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2149 ) 2150 2151 def _parse_value(self) -> exp.Tuple: 2152 if self._match(TokenType.L_PAREN): 2153 expressions = self._parse_csv(self._parse_conjunction) 2154 self._match_r_paren() 2155 return self.expression(exp.Tuple, expressions=expressions) 2156 2157 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2158 # https://prestodb.io/docs/current/sql/values.html 2159 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2160 2161 def _parse_projections(self) -> t.List[exp.Expression]: 2162 return self._parse_expressions() 2163 2164 def _parse_select( 2165 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2166 ) -> t.Optional[exp.Expression]: 2167 cte = self._parse_with() 2168 2169 if cte: 2170 this = self._parse_statement() 2171 2172 if not this: 2173 self.raise_error("Failed to parse any statement following CTE") 2174 return cte 2175 2176 if "with" in this.arg_types: 2177 this.set("with", cte) 2178 else: 2179 self.raise_error(f"{this.key} does not support CTE") 2180 this = cte 2181 2182 return this 2183 2184 # duckdb supports leading with FROM x 2185 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2186 2187 if self._match(TokenType.SELECT): 2188 comments = self._prev_comments 2189 2190 hint = self._parse_hint() 2191 all_ = self._match(TokenType.ALL) 2192 distinct = self._match_set(self.DISTINCT_TOKENS) 2193 2194 kind = ( 2195 self._match(TokenType.ALIAS) 2196 and self._match_texts(("STRUCT", "VALUE")) 2197 and self._prev.text 2198 ) 2199 2200 if distinct: 2201 distinct = self.expression( 2202 exp.Distinct, 2203 on=self._parse_value() if self._match(TokenType.ON) else None, 2204 ) 2205 2206 if all_ and distinct: 2207 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2208 2209 limit = self._parse_limit(top=True) 2210 projections = self._parse_projections() 2211 2212 this = self.expression( 2213 exp.Select, 2214 kind=kind, 2215 hint=hint, 2216 distinct=distinct, 2217 expressions=projections, 2218 limit=limit, 2219 ) 2220 this.comments = comments 2221 2222 into = self._parse_into() 2223 if into: 2224 this.set("into", into) 2225 2226 if not from_: 2227 from_ = self._parse_from() 2228 2229 if from_: 2230 this.set("from", from_) 2231 2232 this = self._parse_query_modifiers(this) 2233 elif (table or nested) and self._match(TokenType.L_PAREN): 2234 if self._match(TokenType.PIVOT): 2235 this = self._parse_simplified_pivot() 2236 elif self._match(TokenType.FROM): 2237 this = exp.select("*").from_( 2238 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2239 ) 2240 else: 2241 this = self._parse_table() if table else self._parse_select(nested=True) 2242 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2243 2244 self._match_r_paren() 2245 2246 # We return early here so that the UNION isn't attached to the subquery by the 2247 # following call to _parse_set_operations, but instead becomes the parent node 2248 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2249 elif self._match(TokenType.VALUES): 2250 this = self.expression( 2251 exp.Values, 2252 expressions=self._parse_csv(self._parse_value), 2253 alias=self._parse_table_alias(), 2254 ) 2255 elif from_: 2256 this = exp.select("*").from_(from_.this, copy=False) 2257 else: 2258 this = None 2259 2260 return self._parse_set_operations(this) 2261 2262 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2263 if not skip_with_token and not self._match(TokenType.WITH): 2264 return None 2265 2266 comments = self._prev_comments 2267 recursive = self._match(TokenType.RECURSIVE) 2268 2269 expressions = [] 2270 while True: 2271 expressions.append(self._parse_cte()) 2272 2273 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2274 break 2275 else: 2276 self._match(TokenType.WITH) 2277 2278 return self.expression( 2279 exp.With, comments=comments, expressions=expressions, recursive=recursive 2280 ) 2281 2282 def _parse_cte(self) -> exp.CTE: 2283 alias = self._parse_table_alias() 2284 if not alias or not alias.this: 2285 self.raise_error("Expected CTE to have alias") 2286 2287 self._match(TokenType.ALIAS) 2288 return self.expression( 2289 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2290 ) 2291 2292 def _parse_table_alias( 2293 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2294 ) -> t.Optional[exp.TableAlias]: 2295 any_token = self._match(TokenType.ALIAS) 2296 alias = ( 2297 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2298 or self._parse_string_as_identifier() 2299 ) 2300 2301 index = self._index 2302 if self._match(TokenType.L_PAREN): 2303 columns = self._parse_csv(self._parse_function_parameter) 2304 self._match_r_paren() if columns else self._retreat(index) 2305 else: 2306 columns = None 2307 2308 if not alias and not columns: 2309 return None 2310 2311 return self.expression(exp.TableAlias, this=alias, columns=columns) 2312 2313 def _parse_subquery( 2314 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2315 ) -> t.Optional[exp.Subquery]: 2316 if not this: 2317 return None 2318 2319 return self.expression( 2320 exp.Subquery, 2321 this=this, 2322 pivots=self._parse_pivots(), 2323 alias=self._parse_table_alias() if parse_alias else None, 2324 ) 2325 2326 def _parse_query_modifiers( 2327 self, this: t.Optional[exp.Expression] 2328 ) -> t.Optional[exp.Expression]: 2329 if isinstance(this, self.MODIFIABLES): 2330 for join in iter(self._parse_join, None): 2331 this.append("joins", join) 2332 for lateral in iter(self._parse_lateral, None): 2333 this.append("laterals", lateral) 2334 2335 while True: 2336 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2337 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2338 key, expression = parser(self) 2339 2340 if expression: 2341 this.set(key, expression) 2342 if key == "limit": 2343 offset = expression.args.pop("offset", None) 2344 if offset: 2345 this.set("offset", exp.Offset(expression=offset)) 2346 continue 2347 break 2348 return this 2349 2350 def _parse_hint(self) -> t.Optional[exp.Hint]: 2351 if self._match(TokenType.HINT): 2352 hints = [] 2353 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2354 hints.extend(hint) 2355 2356 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2357 self.raise_error("Expected */ after HINT") 2358 2359 return self.expression(exp.Hint, expressions=hints) 2360 2361 return None 2362 2363 def _parse_into(self) -> t.Optional[exp.Into]: 2364 if not self._match(TokenType.INTO): 2365 return None 2366 2367 temp = self._match(TokenType.TEMPORARY) 2368 unlogged = self._match_text_seq("UNLOGGED") 2369 self._match(TokenType.TABLE) 2370 2371 return self.expression( 2372 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2373 ) 2374 2375 def _parse_from( 2376 self, joins: bool = False, skip_from_token: bool = False 2377 ) -> t.Optional[exp.From]: 2378 if not skip_from_token and not self._match(TokenType.FROM): 2379 return None 2380 2381 return self.expression( 2382 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2383 ) 2384 2385 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2386 if not self._match(TokenType.MATCH_RECOGNIZE): 2387 return None 2388 2389 self._match_l_paren() 2390 2391 partition = self._parse_partition_by() 2392 order = self._parse_order() 2393 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2394 2395 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2396 rows = exp.var("ONE ROW PER MATCH") 2397 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2398 text = "ALL ROWS PER MATCH" 2399 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2400 text += f" SHOW EMPTY MATCHES" 2401 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2402 text += f" OMIT EMPTY MATCHES" 2403 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2404 text += f" WITH UNMATCHED ROWS" 2405 rows = exp.var(text) 2406 else: 2407 rows = None 2408 2409 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2410 text = "AFTER MATCH SKIP" 2411 if self._match_text_seq("PAST", "LAST", "ROW"): 2412 text += f" PAST LAST ROW" 2413 elif self._match_text_seq("TO", "NEXT", "ROW"): 2414 text += f" TO NEXT ROW" 2415 elif self._match_text_seq("TO", "FIRST"): 2416 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2417 elif self._match_text_seq("TO", "LAST"): 2418 text += f" TO LAST {self._advance_any().text}" # type: ignore 2419 after = exp.var(text) 2420 else: 2421 after = None 2422 2423 if self._match_text_seq("PATTERN"): 2424 self._match_l_paren() 2425 2426 if not self._curr: 2427 self.raise_error("Expecting )", self._curr) 2428 2429 paren = 1 2430 start = self._curr 2431 2432 while self._curr and paren > 0: 2433 if self._curr.token_type == TokenType.L_PAREN: 2434 paren += 1 2435 if self._curr.token_type == TokenType.R_PAREN: 2436 paren -= 1 2437 2438 end = self._prev 2439 self._advance() 2440 2441 if paren > 0: 2442 self.raise_error("Expecting )", self._curr) 2443 2444 pattern = exp.var(self._find_sql(start, end)) 2445 else: 2446 pattern = None 2447 2448 define = ( 2449 self._parse_csv( 2450 lambda: self.expression( 2451 exp.Alias, 2452 alias=self._parse_id_var(any_token=True), 2453 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2454 ) 2455 ) 2456 if self._match_text_seq("DEFINE") 2457 else None 2458 ) 2459 2460 self._match_r_paren() 2461 2462 return self.expression( 2463 exp.MatchRecognize, 2464 partition_by=partition, 2465 order=order, 2466 measures=measures, 2467 rows=rows, 2468 after=after, 2469 pattern=pattern, 2470 define=define, 2471 alias=self._parse_table_alias(), 2472 ) 2473 2474 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2475 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2476 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2477 2478 if outer_apply or cross_apply: 2479 this = self._parse_select(table=True) 2480 view = None 2481 outer = not cross_apply 2482 elif self._match(TokenType.LATERAL): 2483 this = self._parse_select(table=True) 2484 view = self._match(TokenType.VIEW) 2485 outer = self._match(TokenType.OUTER) 2486 else: 2487 return None 2488 2489 if not this: 2490 this = ( 2491 self._parse_unnest() 2492 or self._parse_function() 2493 or self._parse_id_var(any_token=False) 2494 ) 2495 2496 while self._match(TokenType.DOT): 2497 this = exp.Dot( 2498 this=this, 2499 expression=self._parse_function() or self._parse_id_var(any_token=False), 2500 ) 2501 2502 if view: 2503 table = self._parse_id_var(any_token=False) 2504 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2505 table_alias: t.Optional[exp.TableAlias] = self.expression( 2506 exp.TableAlias, this=table, columns=columns 2507 ) 2508 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2509 # We move the alias from the lateral's child node to the lateral itself 2510 table_alias = this.args["alias"].pop() 2511 else: 2512 table_alias = self._parse_table_alias() 2513 2514 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2515 2516 def _parse_join_parts( 2517 self, 2518 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2519 return ( 2520 self._match_set(self.JOIN_METHODS) and self._prev, 2521 self._match_set(self.JOIN_SIDES) and self._prev, 2522 self._match_set(self.JOIN_KINDS) and self._prev, 2523 ) 2524 2525 def _parse_join( 2526 self, skip_join_token: bool = False, parse_bracket: bool = False 2527 ) -> t.Optional[exp.Join]: 2528 if self._match(TokenType.COMMA): 2529 return self.expression(exp.Join, this=self._parse_table()) 2530 2531 index = self._index 2532 method, side, kind = self._parse_join_parts() 2533 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2534 join = self._match(TokenType.JOIN) 2535 2536 if not skip_join_token and not join: 2537 self._retreat(index) 2538 kind = None 2539 method = None 2540 side = None 2541 2542 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2543 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2544 2545 if not skip_join_token and not join and not outer_apply and not cross_apply: 2546 return None 2547 2548 if outer_apply: 2549 side = Token(TokenType.LEFT, "LEFT") 2550 2551 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2552 2553 if method: 2554 kwargs["method"] = method.text 2555 if side: 2556 kwargs["side"] = side.text 2557 if kind: 2558 kwargs["kind"] = kind.text 2559 if hint: 2560 kwargs["hint"] = hint 2561 2562 if self._match(TokenType.ON): 2563 kwargs["on"] = self._parse_conjunction() 2564 elif self._match(TokenType.USING): 2565 kwargs["using"] = self._parse_wrapped_id_vars() 2566 elif not (kind and kind.token_type == TokenType.CROSS): 2567 index = self._index 2568 join = self._parse_join() 2569 2570 if join and self._match(TokenType.ON): 2571 kwargs["on"] = self._parse_conjunction() 2572 elif join and self._match(TokenType.USING): 2573 kwargs["using"] = self._parse_wrapped_id_vars() 2574 else: 2575 join = None 2576 self._retreat(index) 2577 2578 kwargs["this"].set("joins", [join] if join else None) 2579 2580 comments = [c for token in (method, side, kind) if token for c in token.comments] 2581 return self.expression(exp.Join, comments=comments, **kwargs) 2582 2583 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2584 this = self._parse_conjunction() 2585 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2586 return this 2587 2588 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2589 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2590 2591 return this 2592 2593 def _parse_index( 2594 self, 2595 index: t.Optional[exp.Expression] = None, 2596 ) -> t.Optional[exp.Index]: 2597 if index: 2598 unique = None 2599 primary = None 2600 amp = None 2601 2602 self._match(TokenType.ON) 2603 self._match(TokenType.TABLE) # hive 2604 table = self._parse_table_parts(schema=True) 2605 else: 2606 unique = self._match(TokenType.UNIQUE) 2607 primary = self._match_text_seq("PRIMARY") 2608 amp = self._match_text_seq("AMP") 2609 2610 if not self._match(TokenType.INDEX): 2611 return None 2612 2613 index = self._parse_id_var() 2614 table = None 2615 2616 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2617 2618 if self._match(TokenType.L_PAREN, advance=False): 2619 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2620 else: 2621 columns = None 2622 2623 return self.expression( 2624 exp.Index, 2625 this=index, 2626 table=table, 2627 using=using, 2628 columns=columns, 2629 unique=unique, 2630 primary=primary, 2631 amp=amp, 2632 partition_by=self._parse_partition_by(), 2633 where=self._parse_where(), 2634 ) 2635 2636 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2637 hints: t.List[exp.Expression] = [] 2638 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2639 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2640 hints.append( 2641 self.expression( 2642 exp.WithTableHint, 2643 expressions=self._parse_csv( 2644 lambda: self._parse_function() or self._parse_var(any_token=True) 2645 ), 2646 ) 2647 ) 2648 self._match_r_paren() 2649 else: 2650 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2651 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2652 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2653 2654 self._match_texts({"INDEX", "KEY"}) 2655 if self._match(TokenType.FOR): 2656 hint.set("target", self._advance_any() and self._prev.text.upper()) 2657 2658 hint.set("expressions", self._parse_wrapped_id_vars()) 2659 hints.append(hint) 2660 2661 return hints or None 2662 2663 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2664 return ( 2665 (not schema and self._parse_function(optional_parens=False)) 2666 or self._parse_id_var(any_token=False) 2667 or self._parse_string_as_identifier() 2668 or self._parse_placeholder() 2669 ) 2670 2671 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2672 catalog = None 2673 db = None 2674 table = self._parse_table_part(schema=schema) 2675 2676 while self._match(TokenType.DOT): 2677 if catalog: 2678 # This allows nesting the table in arbitrarily many dot expressions if needed 2679 table = self.expression( 2680 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2681 ) 2682 else: 2683 catalog = db 2684 db = table 2685 table = self._parse_table_part(schema=schema) 2686 2687 if not table: 2688 self.raise_error(f"Expected table name but got {self._curr}") 2689 2690 return self.expression( 2691 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2692 ) 2693 2694 def _parse_table( 2695 self, 2696 schema: bool = False, 2697 joins: bool = False, 2698 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2699 parse_bracket: bool = False, 2700 ) -> t.Optional[exp.Expression]: 2701 lateral = self._parse_lateral() 2702 if lateral: 2703 return lateral 2704 2705 unnest = self._parse_unnest() 2706 if unnest: 2707 return unnest 2708 2709 values = self._parse_derived_table_values() 2710 if values: 2711 return values 2712 2713 subquery = self._parse_select(table=True) 2714 if subquery: 2715 if not subquery.args.get("pivots"): 2716 subquery.set("pivots", self._parse_pivots()) 2717 return subquery 2718 2719 bracket = parse_bracket and self._parse_bracket(None) 2720 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2721 this = t.cast( 2722 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2723 ) 2724 2725 if schema: 2726 return self._parse_schema(this=this) 2727 2728 version = self._parse_version() 2729 2730 if version: 2731 this.set("version", version) 2732 2733 if self.ALIAS_POST_TABLESAMPLE: 2734 table_sample = self._parse_table_sample() 2735 2736 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2737 if alias: 2738 this.set("alias", alias) 2739 2740 if self._match_text_seq("AT"): 2741 this.set("index", self._parse_id_var()) 2742 2743 this.set("hints", self._parse_table_hints()) 2744 2745 if not this.args.get("pivots"): 2746 this.set("pivots", self._parse_pivots()) 2747 2748 if not self.ALIAS_POST_TABLESAMPLE: 2749 table_sample = self._parse_table_sample() 2750 2751 if table_sample: 2752 table_sample.set("this", this) 2753 this = table_sample 2754 2755 if joins: 2756 for join in iter(self._parse_join, None): 2757 this.append("joins", join) 2758 2759 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 2760 this.set("ordinality", True) 2761 this.set("alias", self._parse_table_alias()) 2762 2763 return this 2764 2765 def _parse_version(self) -> t.Optional[exp.Version]: 2766 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2767 this = "TIMESTAMP" 2768 elif self._match(TokenType.VERSION_SNAPSHOT): 2769 this = "VERSION" 2770 else: 2771 return None 2772 2773 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2774 kind = self._prev.text.upper() 2775 start = self._parse_bitwise() 2776 self._match_texts(("TO", "AND")) 2777 end = self._parse_bitwise() 2778 expression: t.Optional[exp.Expression] = self.expression( 2779 exp.Tuple, expressions=[start, end] 2780 ) 2781 elif self._match_text_seq("CONTAINED", "IN"): 2782 kind = "CONTAINED IN" 2783 expression = self.expression( 2784 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2785 ) 2786 elif self._match(TokenType.ALL): 2787 kind = "ALL" 2788 expression = None 2789 else: 2790 self._match_text_seq("AS", "OF") 2791 kind = "AS OF" 2792 expression = self._parse_type() 2793 2794 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2795 2796 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2797 if not self._match(TokenType.UNNEST): 2798 return None 2799 2800 expressions = self._parse_wrapped_csv(self._parse_type) 2801 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2802 2803 alias = self._parse_table_alias() if with_alias else None 2804 2805 if alias: 2806 if self.UNNEST_COLUMN_ONLY: 2807 if alias.args.get("columns"): 2808 self.raise_error("Unexpected extra column alias in unnest.") 2809 2810 alias.set("columns", [alias.this]) 2811 alias.set("this", None) 2812 2813 columns = alias.args.get("columns") or [] 2814 if offset and len(expressions) < len(columns): 2815 offset = columns.pop() 2816 2817 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2818 self._match(TokenType.ALIAS) 2819 offset = self._parse_id_var( 2820 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2821 ) or exp.to_identifier("offset") 2822 2823 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2824 2825 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2826 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2827 if not is_derived and not self._match(TokenType.VALUES): 2828 return None 2829 2830 expressions = self._parse_csv(self._parse_value) 2831 alias = self._parse_table_alias() 2832 2833 if is_derived: 2834 self._match_r_paren() 2835 2836 return self.expression( 2837 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2838 ) 2839 2840 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2841 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2842 as_modifier and self._match_text_seq("USING", "SAMPLE") 2843 ): 2844 return None 2845 2846 bucket_numerator = None 2847 bucket_denominator = None 2848 bucket_field = None 2849 percent = None 2850 rows = None 2851 size = None 2852 seed = None 2853 2854 kind = ( 2855 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2856 ) 2857 method = self._parse_var(tokens=(TokenType.ROW,)) 2858 2859 matched_l_paren = self._match(TokenType.L_PAREN) 2860 2861 if self.TABLESAMPLE_CSV: 2862 num = None 2863 expressions = self._parse_csv(self._parse_primary) 2864 else: 2865 expressions = None 2866 num = ( 2867 self._parse_factor() 2868 if self._match(TokenType.NUMBER, advance=False) 2869 else self._parse_primary() 2870 ) 2871 2872 if self._match_text_seq("BUCKET"): 2873 bucket_numerator = self._parse_number() 2874 self._match_text_seq("OUT", "OF") 2875 bucket_denominator = bucket_denominator = self._parse_number() 2876 self._match(TokenType.ON) 2877 bucket_field = self._parse_field() 2878 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2879 percent = num 2880 elif self._match(TokenType.ROWS): 2881 rows = num 2882 elif num: 2883 size = num 2884 2885 if matched_l_paren: 2886 self._match_r_paren() 2887 2888 if self._match(TokenType.L_PAREN): 2889 method = self._parse_var() 2890 seed = self._match(TokenType.COMMA) and self._parse_number() 2891 self._match_r_paren() 2892 elif self._match_texts(("SEED", "REPEATABLE")): 2893 seed = self._parse_wrapped(self._parse_number) 2894 2895 return self.expression( 2896 exp.TableSample, 2897 expressions=expressions, 2898 method=method, 2899 bucket_numerator=bucket_numerator, 2900 bucket_denominator=bucket_denominator, 2901 bucket_field=bucket_field, 2902 percent=percent, 2903 rows=rows, 2904 size=size, 2905 seed=seed, 2906 kind=kind, 2907 ) 2908 2909 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2910 return list(iter(self._parse_pivot, None)) or None 2911 2912 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2913 return list(iter(self._parse_join, None)) or None 2914 2915 # https://duckdb.org/docs/sql/statements/pivot 2916 def _parse_simplified_pivot(self) -> exp.Pivot: 2917 def _parse_on() -> t.Optional[exp.Expression]: 2918 this = self._parse_bitwise() 2919 return self._parse_in(this) if self._match(TokenType.IN) else this 2920 2921 this = self._parse_table() 2922 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2923 using = self._match(TokenType.USING) and self._parse_csv( 2924 lambda: self._parse_alias(self._parse_function()) 2925 ) 2926 group = self._parse_group() 2927 return self.expression( 2928 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2929 ) 2930 2931 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2932 index = self._index 2933 include_nulls = None 2934 2935 if self._match(TokenType.PIVOT): 2936 unpivot = False 2937 elif self._match(TokenType.UNPIVOT): 2938 unpivot = True 2939 2940 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2941 if self._match_text_seq("INCLUDE", "NULLS"): 2942 include_nulls = True 2943 elif self._match_text_seq("EXCLUDE", "NULLS"): 2944 include_nulls = False 2945 else: 2946 return None 2947 2948 expressions = [] 2949 field = None 2950 2951 if not self._match(TokenType.L_PAREN): 2952 self._retreat(index) 2953 return None 2954 2955 if unpivot: 2956 expressions = self._parse_csv(self._parse_column) 2957 else: 2958 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2959 2960 if not expressions: 2961 self.raise_error("Failed to parse PIVOT's aggregation list") 2962 2963 if not self._match(TokenType.FOR): 2964 self.raise_error("Expecting FOR") 2965 2966 value = self._parse_column() 2967 2968 if not self._match(TokenType.IN): 2969 self.raise_error("Expecting IN") 2970 2971 field = self._parse_in(value, alias=True) 2972 2973 self._match_r_paren() 2974 2975 pivot = self.expression( 2976 exp.Pivot, 2977 expressions=expressions, 2978 field=field, 2979 unpivot=unpivot, 2980 include_nulls=include_nulls, 2981 ) 2982 2983 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2984 pivot.set("alias", self._parse_table_alias()) 2985 2986 if not unpivot: 2987 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2988 2989 columns: t.List[exp.Expression] = [] 2990 for fld in pivot.args["field"].expressions: 2991 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2992 for name in names: 2993 if self.PREFIXED_PIVOT_COLUMNS: 2994 name = f"{name}_{field_name}" if name else field_name 2995 else: 2996 name = f"{field_name}_{name}" if name else field_name 2997 2998 columns.append(exp.to_identifier(name)) 2999 3000 pivot.set("columns", columns) 3001 3002 return pivot 3003 3004 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3005 return [agg.alias for agg in aggregations] 3006 3007 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3008 if not skip_where_token and not self._match(TokenType.WHERE): 3009 return None 3010 3011 return self.expression( 3012 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3013 ) 3014 3015 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3016 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3017 return None 3018 3019 elements = defaultdict(list) 3020 3021 if self._match(TokenType.ALL): 3022 return self.expression(exp.Group, all=True) 3023 3024 while True: 3025 expressions = self._parse_csv(self._parse_conjunction) 3026 if expressions: 3027 elements["expressions"].extend(expressions) 3028 3029 grouping_sets = self._parse_grouping_sets() 3030 if grouping_sets: 3031 elements["grouping_sets"].extend(grouping_sets) 3032 3033 rollup = None 3034 cube = None 3035 totals = None 3036 3037 index = self._index 3038 with_ = self._match(TokenType.WITH) 3039 if self._match(TokenType.ROLLUP): 3040 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3041 elements["rollup"].extend(ensure_list(rollup)) 3042 3043 if self._match(TokenType.CUBE): 3044 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3045 elements["cube"].extend(ensure_list(cube)) 3046 3047 if self._match_text_seq("TOTALS"): 3048 totals = True 3049 elements["totals"] = True # type: ignore 3050 3051 if not (grouping_sets or rollup or cube or totals): 3052 if with_: 3053 self._retreat(index) 3054 break 3055 3056 return self.expression(exp.Group, **elements) # type: ignore 3057 3058 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3059 if not self._match(TokenType.GROUPING_SETS): 3060 return None 3061 3062 return self._parse_wrapped_csv(self._parse_grouping_set) 3063 3064 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3065 if self._match(TokenType.L_PAREN): 3066 grouping_set = self._parse_csv(self._parse_column) 3067 self._match_r_paren() 3068 return self.expression(exp.Tuple, expressions=grouping_set) 3069 3070 return self._parse_column() 3071 3072 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3073 if not skip_having_token and not self._match(TokenType.HAVING): 3074 return None 3075 return self.expression(exp.Having, this=self._parse_conjunction()) 3076 3077 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3078 if not self._match(TokenType.QUALIFY): 3079 return None 3080 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3081 3082 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3083 if skip_start_token: 3084 start = None 3085 elif self._match(TokenType.START_WITH): 3086 start = self._parse_conjunction() 3087 else: 3088 return None 3089 3090 self._match(TokenType.CONNECT_BY) 3091 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3092 exp.Prior, this=self._parse_bitwise() 3093 ) 3094 connect = self._parse_conjunction() 3095 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3096 3097 if not start and self._match(TokenType.START_WITH): 3098 start = self._parse_conjunction() 3099 3100 return self.expression(exp.Connect, start=start, connect=connect) 3101 3102 def _parse_order( 3103 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3104 ) -> t.Optional[exp.Expression]: 3105 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3106 return this 3107 3108 return self.expression( 3109 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3110 ) 3111 3112 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3113 if not self._match(token): 3114 return None 3115 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3116 3117 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3118 this = parse_method() if parse_method else self._parse_conjunction() 3119 3120 asc = self._match(TokenType.ASC) 3121 desc = self._match(TokenType.DESC) or (asc and False) 3122 3123 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3124 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3125 3126 nulls_first = is_nulls_first or False 3127 explicitly_null_ordered = is_nulls_first or is_nulls_last 3128 3129 if ( 3130 not explicitly_null_ordered 3131 and ( 3132 (not desc and self.NULL_ORDERING == "nulls_are_small") 3133 or (desc and self.NULL_ORDERING != "nulls_are_small") 3134 ) 3135 and self.NULL_ORDERING != "nulls_are_last" 3136 ): 3137 nulls_first = True 3138 3139 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3140 3141 def _parse_limit( 3142 self, this: t.Optional[exp.Expression] = None, top: bool = False 3143 ) -> t.Optional[exp.Expression]: 3144 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3145 comments = self._prev_comments 3146 if top: 3147 limit_paren = self._match(TokenType.L_PAREN) 3148 expression = self._parse_number() 3149 3150 if limit_paren: 3151 self._match_r_paren() 3152 else: 3153 expression = self._parse_term() 3154 3155 if self._match(TokenType.COMMA): 3156 offset = expression 3157 expression = self._parse_term() 3158 else: 3159 offset = None 3160 3161 limit_exp = self.expression( 3162 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3163 ) 3164 3165 return limit_exp 3166 3167 if self._match(TokenType.FETCH): 3168 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3169 direction = self._prev.text if direction else "FIRST" 3170 3171 count = self._parse_field(tokens=self.FETCH_TOKENS) 3172 percent = self._match(TokenType.PERCENT) 3173 3174 self._match_set((TokenType.ROW, TokenType.ROWS)) 3175 3176 only = self._match_text_seq("ONLY") 3177 with_ties = self._match_text_seq("WITH", "TIES") 3178 3179 if only and with_ties: 3180 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3181 3182 return self.expression( 3183 exp.Fetch, 3184 direction=direction, 3185 count=count, 3186 percent=percent, 3187 with_ties=with_ties, 3188 ) 3189 3190 return this 3191 3192 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3193 if not self._match(TokenType.OFFSET): 3194 return this 3195 3196 count = self._parse_term() 3197 self._match_set((TokenType.ROW, TokenType.ROWS)) 3198 return self.expression(exp.Offset, this=this, expression=count) 3199 3200 def _parse_locks(self) -> t.List[exp.Lock]: 3201 locks = [] 3202 while True: 3203 if self._match_text_seq("FOR", "UPDATE"): 3204 update = True 3205 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3206 "LOCK", "IN", "SHARE", "MODE" 3207 ): 3208 update = False 3209 else: 3210 break 3211 3212 expressions = None 3213 if self._match_text_seq("OF"): 3214 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3215 3216 wait: t.Optional[bool | exp.Expression] = None 3217 if self._match_text_seq("NOWAIT"): 3218 wait = True 3219 elif self._match_text_seq("WAIT"): 3220 wait = self._parse_primary() 3221 elif self._match_text_seq("SKIP", "LOCKED"): 3222 wait = False 3223 3224 locks.append( 3225 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3226 ) 3227 3228 return locks 3229 3230 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3231 if not self._match_set(self.SET_OPERATIONS): 3232 return this 3233 3234 token_type = self._prev.token_type 3235 3236 if token_type == TokenType.UNION: 3237 expression = exp.Union 3238 elif token_type == TokenType.EXCEPT: 3239 expression = exp.Except 3240 else: 3241 expression = exp.Intersect 3242 3243 return self.expression( 3244 expression, 3245 comments=self._prev.comments, 3246 this=this, 3247 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3248 by_name=self._match_text_seq("BY", "NAME"), 3249 expression=self._parse_set_operations(self._parse_select(nested=True)), 3250 ) 3251 3252 def _parse_expression(self) -> t.Optional[exp.Expression]: 3253 return self._parse_alias(self._parse_conjunction()) 3254 3255 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3256 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3257 3258 def _parse_equality(self) -> t.Optional[exp.Expression]: 3259 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3260 3261 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3262 return self._parse_tokens(self._parse_range, self.COMPARISON) 3263 3264 def _parse_range(self) -> t.Optional[exp.Expression]: 3265 this = self._parse_bitwise() 3266 negate = self._match(TokenType.NOT) 3267 3268 if self._match_set(self.RANGE_PARSERS): 3269 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3270 if not expression: 3271 return this 3272 3273 this = expression 3274 elif self._match(TokenType.ISNULL): 3275 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3276 3277 # Postgres supports ISNULL and NOTNULL for conditions. 3278 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3279 if self._match(TokenType.NOTNULL): 3280 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3281 this = self.expression(exp.Not, this=this) 3282 3283 if negate: 3284 this = self.expression(exp.Not, this=this) 3285 3286 if self._match(TokenType.IS): 3287 this = self._parse_is(this) 3288 3289 return this 3290 3291 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3292 index = self._index - 1 3293 negate = self._match(TokenType.NOT) 3294 3295 if self._match_text_seq("DISTINCT", "FROM"): 3296 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3297 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3298 3299 expression = self._parse_null() or self._parse_boolean() 3300 if not expression: 3301 self._retreat(index) 3302 return None 3303 3304 this = self.expression(exp.Is, this=this, expression=expression) 3305 return self.expression(exp.Not, this=this) if negate else this 3306 3307 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3308 unnest = self._parse_unnest(with_alias=False) 3309 if unnest: 3310 this = self.expression(exp.In, this=this, unnest=unnest) 3311 elif self._match(TokenType.L_PAREN): 3312 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3313 3314 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3315 this = self.expression(exp.In, this=this, query=expressions[0]) 3316 else: 3317 this = self.expression(exp.In, this=this, expressions=expressions) 3318 3319 self._match_r_paren(this) 3320 else: 3321 this = self.expression(exp.In, this=this, field=self._parse_field()) 3322 3323 return this 3324 3325 def _parse_between(self, this: exp.Expression) -> exp.Between: 3326 low = self._parse_bitwise() 3327 self._match(TokenType.AND) 3328 high = self._parse_bitwise() 3329 return self.expression(exp.Between, this=this, low=low, high=high) 3330 3331 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3332 if not self._match(TokenType.ESCAPE): 3333 return this 3334 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3335 3336 def _parse_interval(self) -> t.Optional[exp.Interval]: 3337 index = self._index 3338 3339 if not self._match(TokenType.INTERVAL): 3340 return None 3341 3342 if self._match(TokenType.STRING, advance=False): 3343 this = self._parse_primary() 3344 else: 3345 this = self._parse_term() 3346 3347 if not this: 3348 self._retreat(index) 3349 return None 3350 3351 unit = self._parse_function() or self._parse_var(any_token=True) 3352 3353 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3354 # each INTERVAL expression into this canonical form so it's easy to transpile 3355 if this and this.is_number: 3356 this = exp.Literal.string(this.name) 3357 elif this and this.is_string: 3358 parts = this.name.split() 3359 3360 if len(parts) == 2: 3361 if unit: 3362 # This is not actually a unit, it's something else (e.g. a "window side") 3363 unit = None 3364 self._retreat(self._index - 1) 3365 3366 this = exp.Literal.string(parts[0]) 3367 unit = self.expression(exp.Var, this=parts[1]) 3368 3369 return self.expression(exp.Interval, this=this, unit=unit) 3370 3371 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3372 this = self._parse_term() 3373 3374 while True: 3375 if self._match_set(self.BITWISE): 3376 this = self.expression( 3377 self.BITWISE[self._prev.token_type], 3378 this=this, 3379 expression=self._parse_term(), 3380 ) 3381 elif self._match(TokenType.DQMARK): 3382 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3383 elif self._match_pair(TokenType.LT, TokenType.LT): 3384 this = self.expression( 3385 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3386 ) 3387 elif self._match_pair(TokenType.GT, TokenType.GT): 3388 this = self.expression( 3389 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3390 ) 3391 else: 3392 break 3393 3394 return this 3395 3396 def _parse_term(self) -> t.Optional[exp.Expression]: 3397 return self._parse_tokens(self._parse_factor, self.TERM) 3398 3399 def _parse_factor(self) -> t.Optional[exp.Expression]: 3400 if self.EXPONENT: 3401 return self._parse_tokens(self._parse_exponent, self.FACTOR) 3402 return self._parse_tokens(self._parse_unary, self.FACTOR) 3403 3404 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3405 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3406 3407 def _parse_unary(self) -> t.Optional[exp.Expression]: 3408 if self._match_set(self.UNARY_PARSERS): 3409 return self.UNARY_PARSERS[self._prev.token_type](self) 3410 return self._parse_at_time_zone(self._parse_type()) 3411 3412 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3413 interval = parse_interval and self._parse_interval() 3414 if interval: 3415 return interval 3416 3417 index = self._index 3418 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3419 this = self._parse_column() 3420 3421 if data_type: 3422 if isinstance(this, exp.Literal): 3423 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3424 if parser: 3425 return parser(self, this, data_type) 3426 return self.expression(exp.Cast, this=this, to=data_type) 3427 if not data_type.expressions: 3428 self._retreat(index) 3429 return self._parse_column() 3430 return self._parse_column_ops(data_type) 3431 3432 return this and self._parse_column_ops(this) 3433 3434 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3435 this = self._parse_type() 3436 if not this: 3437 return None 3438 3439 return self.expression( 3440 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3441 ) 3442 3443 def _parse_types( 3444 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3445 ) -> t.Optional[exp.Expression]: 3446 index = self._index 3447 3448 prefix = self._match_text_seq("SYSUDTLIB", ".") 3449 3450 if not self._match_set(self.TYPE_TOKENS): 3451 identifier = allow_identifiers and self._parse_id_var( 3452 any_token=False, tokens=(TokenType.VAR,) 3453 ) 3454 3455 if identifier: 3456 tokens = self._tokenizer.tokenize(identifier.name) 3457 3458 if len(tokens) != 1: 3459 self.raise_error("Unexpected identifier", self._prev) 3460 3461 if tokens[0].token_type in self.TYPE_TOKENS: 3462 self._prev = tokens[0] 3463 elif self.SUPPORTS_USER_DEFINED_TYPES: 3464 type_name = identifier.name 3465 3466 while self._match(TokenType.DOT): 3467 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3468 3469 return exp.DataType.build(type_name, udt=True) 3470 else: 3471 return None 3472 else: 3473 return None 3474 3475 type_token = self._prev.token_type 3476 3477 if type_token == TokenType.PSEUDO_TYPE: 3478 return self.expression(exp.PseudoType, this=self._prev.text) 3479 3480 if type_token == TokenType.OBJECT_IDENTIFIER: 3481 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3482 3483 nested = type_token in self.NESTED_TYPE_TOKENS 3484 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3485 expressions = None 3486 maybe_func = False 3487 3488 if self._match(TokenType.L_PAREN): 3489 if is_struct: 3490 expressions = self._parse_csv(self._parse_struct_types) 3491 elif nested: 3492 expressions = self._parse_csv( 3493 lambda: self._parse_types( 3494 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3495 ) 3496 ) 3497 elif type_token in self.ENUM_TYPE_TOKENS: 3498 expressions = self._parse_csv(self._parse_equality) 3499 else: 3500 expressions = self._parse_csv(self._parse_type_size) 3501 3502 if not expressions or not self._match(TokenType.R_PAREN): 3503 self._retreat(index) 3504 return None 3505 3506 maybe_func = True 3507 3508 this: t.Optional[exp.Expression] = None 3509 values: t.Optional[t.List[exp.Expression]] = None 3510 3511 if nested and self._match(TokenType.LT): 3512 if is_struct: 3513 expressions = self._parse_csv(self._parse_struct_types) 3514 else: 3515 expressions = self._parse_csv( 3516 lambda: self._parse_types( 3517 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3518 ) 3519 ) 3520 3521 if not self._match(TokenType.GT): 3522 self.raise_error("Expecting >") 3523 3524 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3525 values = self._parse_csv(self._parse_conjunction) 3526 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3527 3528 if type_token in self.TIMESTAMPS: 3529 if self._match_text_seq("WITH", "TIME", "ZONE"): 3530 maybe_func = False 3531 tz_type = ( 3532 exp.DataType.Type.TIMETZ 3533 if type_token in self.TIMES 3534 else exp.DataType.Type.TIMESTAMPTZ 3535 ) 3536 this = exp.DataType(this=tz_type, expressions=expressions) 3537 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3538 maybe_func = False 3539 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3540 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3541 maybe_func = False 3542 elif type_token == TokenType.INTERVAL: 3543 unit = self._parse_var() 3544 3545 if self._match_text_seq("TO"): 3546 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3547 else: 3548 span = None 3549 3550 if span or not unit: 3551 this = self.expression( 3552 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3553 ) 3554 else: 3555 this = self.expression(exp.Interval, unit=unit) 3556 3557 if maybe_func and check_func: 3558 index2 = self._index 3559 peek = self._parse_string() 3560 3561 if not peek: 3562 self._retreat(index) 3563 return None 3564 3565 self._retreat(index2) 3566 3567 if not this: 3568 if self._match_text_seq("UNSIGNED"): 3569 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3570 if not unsigned_type_token: 3571 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3572 3573 type_token = unsigned_type_token or type_token 3574 3575 this = exp.DataType( 3576 this=exp.DataType.Type[type_token.value], 3577 expressions=expressions, 3578 nested=nested, 3579 values=values, 3580 prefix=prefix, 3581 ) 3582 3583 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3584 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3585 3586 return this 3587 3588 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3589 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3590 self._match(TokenType.COLON) 3591 return self._parse_column_def(this) 3592 3593 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 if not self._match_text_seq("AT", "TIME", "ZONE"): 3595 return this 3596 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3597 3598 def _parse_column(self) -> t.Optional[exp.Expression]: 3599 this = self._parse_field() 3600 if isinstance(this, exp.Identifier): 3601 this = self.expression(exp.Column, this=this) 3602 elif not this: 3603 return self._parse_bracket(this) 3604 return self._parse_column_ops(this) 3605 3606 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3607 this = self._parse_bracket(this) 3608 3609 while self._match_set(self.COLUMN_OPERATORS): 3610 op_token = self._prev.token_type 3611 op = self.COLUMN_OPERATORS.get(op_token) 3612 3613 if op_token == TokenType.DCOLON: 3614 field = self._parse_types() 3615 if not field: 3616 self.raise_error("Expected type") 3617 elif op and self._curr: 3618 self._advance() 3619 value = self._prev.text 3620 field = ( 3621 exp.Literal.number(value) 3622 if self._prev.token_type == TokenType.NUMBER 3623 else exp.Literal.string(value) 3624 ) 3625 else: 3626 field = self._parse_field(anonymous_func=True, any_token=True) 3627 3628 if isinstance(field, exp.Func): 3629 # bigquery allows function calls like x.y.count(...) 3630 # SAFE.SUBSTR(...) 3631 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3632 this = self._replace_columns_with_dots(this) 3633 3634 if op: 3635 this = op(self, this, field) 3636 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3637 this = self.expression( 3638 exp.Column, 3639 this=field, 3640 table=this.this, 3641 db=this.args.get("table"), 3642 catalog=this.args.get("db"), 3643 ) 3644 else: 3645 this = self.expression(exp.Dot, this=this, expression=field) 3646 this = self._parse_bracket(this) 3647 return this 3648 3649 def _parse_primary(self) -> t.Optional[exp.Expression]: 3650 if self._match_set(self.PRIMARY_PARSERS): 3651 token_type = self._prev.token_type 3652 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3653 3654 if token_type == TokenType.STRING: 3655 expressions = [primary] 3656 while self._match(TokenType.STRING): 3657 expressions.append(exp.Literal.string(self._prev.text)) 3658 3659 if len(expressions) > 1: 3660 return self.expression(exp.Concat, expressions=expressions) 3661 3662 return primary 3663 3664 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3665 return exp.Literal.number(f"0.{self._prev.text}") 3666 3667 if self._match(TokenType.L_PAREN): 3668 comments = self._prev_comments 3669 query = self._parse_select() 3670 3671 if query: 3672 expressions = [query] 3673 else: 3674 expressions = self._parse_expressions() 3675 3676 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3677 3678 if isinstance(this, exp.Subqueryable): 3679 this = self._parse_set_operations( 3680 self._parse_subquery(this=this, parse_alias=False) 3681 ) 3682 elif len(expressions) > 1: 3683 this = self.expression(exp.Tuple, expressions=expressions) 3684 else: 3685 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3686 3687 if this: 3688 this.add_comments(comments) 3689 3690 self._match_r_paren(expression=this) 3691 return this 3692 3693 return None 3694 3695 def _parse_field( 3696 self, 3697 any_token: bool = False, 3698 tokens: t.Optional[t.Collection[TokenType]] = None, 3699 anonymous_func: bool = False, 3700 ) -> t.Optional[exp.Expression]: 3701 return ( 3702 self._parse_primary() 3703 or self._parse_function(anonymous=anonymous_func) 3704 or self._parse_id_var(any_token=any_token, tokens=tokens) 3705 ) 3706 3707 def _parse_function( 3708 self, 3709 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3710 anonymous: bool = False, 3711 optional_parens: bool = True, 3712 ) -> t.Optional[exp.Expression]: 3713 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3714 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3715 fn_syntax = False 3716 if ( 3717 self._match(TokenType.L_BRACE, advance=False) 3718 and self._next 3719 and self._next.text.upper() == "FN" 3720 ): 3721 self._advance(2) 3722 fn_syntax = True 3723 3724 func = self._parse_function_call( 3725 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3726 ) 3727 3728 if fn_syntax: 3729 self._match(TokenType.R_BRACE) 3730 3731 return func 3732 3733 def _parse_function_call( 3734 self, 3735 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3736 anonymous: bool = False, 3737 optional_parens: bool = True, 3738 ) -> t.Optional[exp.Expression]: 3739 if not self._curr: 3740 return None 3741 3742 token_type = self._curr.token_type 3743 this = self._curr.text 3744 upper = this.upper() 3745 3746 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3747 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3748 self._advance() 3749 return parser(self) 3750 3751 if not self._next or self._next.token_type != TokenType.L_PAREN: 3752 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3753 self._advance() 3754 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3755 3756 return None 3757 3758 if token_type not in self.FUNC_TOKENS: 3759 return None 3760 3761 self._advance(2) 3762 3763 parser = self.FUNCTION_PARSERS.get(upper) 3764 if parser and not anonymous: 3765 this = parser(self) 3766 else: 3767 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3768 3769 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3770 this = self.expression(subquery_predicate, this=self._parse_select()) 3771 self._match_r_paren() 3772 return this 3773 3774 if functions is None: 3775 functions = self.FUNCTIONS 3776 3777 function = functions.get(upper) 3778 3779 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3780 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3781 3782 if function and not anonymous: 3783 func = self.validate_expression(function(args), args) 3784 if not self.NORMALIZE_FUNCTIONS: 3785 func.meta["name"] = this 3786 this = func 3787 else: 3788 this = self.expression(exp.Anonymous, this=this, expressions=args) 3789 3790 self._match_r_paren(this) 3791 return self._parse_window(this) 3792 3793 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3794 return self._parse_column_def(self._parse_id_var()) 3795 3796 def _parse_user_defined_function( 3797 self, kind: t.Optional[TokenType] = None 3798 ) -> t.Optional[exp.Expression]: 3799 this = self._parse_id_var() 3800 3801 while self._match(TokenType.DOT): 3802 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3803 3804 if not self._match(TokenType.L_PAREN): 3805 return this 3806 3807 expressions = self._parse_csv(self._parse_function_parameter) 3808 self._match_r_paren() 3809 return self.expression( 3810 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3811 ) 3812 3813 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3814 literal = self._parse_primary() 3815 if literal: 3816 return self.expression(exp.Introducer, this=token.text, expression=literal) 3817 3818 return self.expression(exp.Identifier, this=token.text) 3819 3820 def _parse_session_parameter(self) -> exp.SessionParameter: 3821 kind = None 3822 this = self._parse_id_var() or self._parse_primary() 3823 3824 if this and self._match(TokenType.DOT): 3825 kind = this.name 3826 this = self._parse_var() or self._parse_primary() 3827 3828 return self.expression(exp.SessionParameter, this=this, kind=kind) 3829 3830 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3831 index = self._index 3832 3833 if self._match(TokenType.L_PAREN): 3834 expressions = t.cast( 3835 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3836 ) 3837 3838 if not self._match(TokenType.R_PAREN): 3839 self._retreat(index) 3840 else: 3841 expressions = [self._parse_id_var()] 3842 3843 if self._match_set(self.LAMBDAS): 3844 return self.LAMBDAS[self._prev.token_type](self, expressions) 3845 3846 self._retreat(index) 3847 3848 this: t.Optional[exp.Expression] 3849 3850 if self._match(TokenType.DISTINCT): 3851 this = self.expression( 3852 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3853 ) 3854 else: 3855 this = self._parse_select_or_expression(alias=alias) 3856 3857 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3858 3859 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3860 index = self._index 3861 3862 if not self.errors: 3863 try: 3864 if self._parse_select(nested=True): 3865 return this 3866 except ParseError: 3867 pass 3868 finally: 3869 self.errors.clear() 3870 self._retreat(index) 3871 3872 if not self._match(TokenType.L_PAREN): 3873 return this 3874 3875 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3876 3877 self._match_r_paren() 3878 return self.expression(exp.Schema, this=this, expressions=args) 3879 3880 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3881 return self._parse_column_def(self._parse_field(any_token=True)) 3882 3883 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3884 # column defs are not really columns, they're identifiers 3885 if isinstance(this, exp.Column): 3886 this = this.this 3887 3888 kind = self._parse_types(schema=True) 3889 3890 if self._match_text_seq("FOR", "ORDINALITY"): 3891 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3892 3893 constraints: t.List[exp.Expression] = [] 3894 3895 if not kind and self._match(TokenType.ALIAS): 3896 constraints.append( 3897 self.expression( 3898 exp.ComputedColumnConstraint, 3899 this=self._parse_conjunction(), 3900 persisted=self._match_text_seq("PERSISTED"), 3901 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3902 ) 3903 ) 3904 3905 while True: 3906 constraint = self._parse_column_constraint() 3907 if not constraint: 3908 break 3909 constraints.append(constraint) 3910 3911 if not kind and not constraints: 3912 return this 3913 3914 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3915 3916 def _parse_auto_increment( 3917 self, 3918 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3919 start = None 3920 increment = None 3921 3922 if self._match(TokenType.L_PAREN, advance=False): 3923 args = self._parse_wrapped_csv(self._parse_bitwise) 3924 start = seq_get(args, 0) 3925 increment = seq_get(args, 1) 3926 elif self._match_text_seq("START"): 3927 start = self._parse_bitwise() 3928 self._match_text_seq("INCREMENT") 3929 increment = self._parse_bitwise() 3930 3931 if start and increment: 3932 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3933 3934 return exp.AutoIncrementColumnConstraint() 3935 3936 def _parse_compress(self) -> exp.CompressColumnConstraint: 3937 if self._match(TokenType.L_PAREN, advance=False): 3938 return self.expression( 3939 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3940 ) 3941 3942 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3943 3944 def _parse_generated_as_identity( 3945 self, 3946 ) -> ( 3947 exp.GeneratedAsIdentityColumnConstraint 3948 | exp.ComputedColumnConstraint 3949 | exp.GeneratedAsRowColumnConstraint 3950 ): 3951 if self._match_text_seq("BY", "DEFAULT"): 3952 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3953 this = self.expression( 3954 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3955 ) 3956 else: 3957 self._match_text_seq("ALWAYS") 3958 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3959 3960 self._match(TokenType.ALIAS) 3961 3962 if self._match_text_seq("ROW"): 3963 start = self._match_text_seq("START") 3964 if not start: 3965 self._match(TokenType.END) 3966 hidden = self._match_text_seq("HIDDEN") 3967 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 3968 3969 identity = self._match_text_seq("IDENTITY") 3970 3971 if self._match(TokenType.L_PAREN): 3972 if self._match(TokenType.START_WITH): 3973 this.set("start", self._parse_bitwise()) 3974 if self._match_text_seq("INCREMENT", "BY"): 3975 this.set("increment", self._parse_bitwise()) 3976 if self._match_text_seq("MINVALUE"): 3977 this.set("minvalue", self._parse_bitwise()) 3978 if self._match_text_seq("MAXVALUE"): 3979 this.set("maxvalue", self._parse_bitwise()) 3980 3981 if self._match_text_seq("CYCLE"): 3982 this.set("cycle", True) 3983 elif self._match_text_seq("NO", "CYCLE"): 3984 this.set("cycle", False) 3985 3986 if not identity: 3987 this.set("expression", self._parse_bitwise()) 3988 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3989 args = self._parse_csv(self._parse_bitwise) 3990 this.set("start", seq_get(args, 0)) 3991 this.set("increment", seq_get(args, 1)) 3992 3993 self._match_r_paren() 3994 3995 return this 3996 3997 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3998 self._match_text_seq("LENGTH") 3999 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4000 4001 def _parse_not_constraint( 4002 self, 4003 ) -> t.Optional[exp.Expression]: 4004 if self._match_text_seq("NULL"): 4005 return self.expression(exp.NotNullColumnConstraint) 4006 if self._match_text_seq("CASESPECIFIC"): 4007 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4008 if self._match_text_seq("FOR", "REPLICATION"): 4009 return self.expression(exp.NotForReplicationColumnConstraint) 4010 return None 4011 4012 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4013 if self._match(TokenType.CONSTRAINT): 4014 this = self._parse_id_var() 4015 else: 4016 this = None 4017 4018 if self._match_texts(self.CONSTRAINT_PARSERS): 4019 return self.expression( 4020 exp.ColumnConstraint, 4021 this=this, 4022 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4023 ) 4024 4025 return this 4026 4027 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4028 if not self._match(TokenType.CONSTRAINT): 4029 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4030 4031 this = self._parse_id_var() 4032 expressions = [] 4033 4034 while True: 4035 constraint = self._parse_unnamed_constraint() or self._parse_function() 4036 if not constraint: 4037 break 4038 expressions.append(constraint) 4039 4040 return self.expression(exp.Constraint, this=this, expressions=expressions) 4041 4042 def _parse_unnamed_constraint( 4043 self, constraints: t.Optional[t.Collection[str]] = None 4044 ) -> t.Optional[exp.Expression]: 4045 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4046 constraints or self.CONSTRAINT_PARSERS 4047 ): 4048 return None 4049 4050 constraint = self._prev.text.upper() 4051 if constraint not in self.CONSTRAINT_PARSERS: 4052 self.raise_error(f"No parser found for schema constraint {constraint}.") 4053 4054 return self.CONSTRAINT_PARSERS[constraint](self) 4055 4056 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4057 self._match_text_seq("KEY") 4058 return self.expression( 4059 exp.UniqueColumnConstraint, 4060 this=self._parse_schema(self._parse_id_var(any_token=False)), 4061 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4062 ) 4063 4064 def _parse_key_constraint_options(self) -> t.List[str]: 4065 options = [] 4066 while True: 4067 if not self._curr: 4068 break 4069 4070 if self._match(TokenType.ON): 4071 action = None 4072 on = self._advance_any() and self._prev.text 4073 4074 if self._match_text_seq("NO", "ACTION"): 4075 action = "NO ACTION" 4076 elif self._match_text_seq("CASCADE"): 4077 action = "CASCADE" 4078 elif self._match_text_seq("RESTRICT"): 4079 action = "RESTRICT" 4080 elif self._match_pair(TokenType.SET, TokenType.NULL): 4081 action = "SET NULL" 4082 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4083 action = "SET DEFAULT" 4084 else: 4085 self.raise_error("Invalid key constraint") 4086 4087 options.append(f"ON {on} {action}") 4088 elif self._match_text_seq("NOT", "ENFORCED"): 4089 options.append("NOT ENFORCED") 4090 elif self._match_text_seq("DEFERRABLE"): 4091 options.append("DEFERRABLE") 4092 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4093 options.append("INITIALLY DEFERRED") 4094 elif self._match_text_seq("NORELY"): 4095 options.append("NORELY") 4096 elif self._match_text_seq("MATCH", "FULL"): 4097 options.append("MATCH FULL") 4098 else: 4099 break 4100 4101 return options 4102 4103 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4104 if match and not self._match(TokenType.REFERENCES): 4105 return None 4106 4107 expressions = None 4108 this = self._parse_table(schema=True) 4109 options = self._parse_key_constraint_options() 4110 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4111 4112 def _parse_foreign_key(self) -> exp.ForeignKey: 4113 expressions = self._parse_wrapped_id_vars() 4114 reference = self._parse_references() 4115 options = {} 4116 4117 while self._match(TokenType.ON): 4118 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4119 self.raise_error("Expected DELETE or UPDATE") 4120 4121 kind = self._prev.text.lower() 4122 4123 if self._match_text_seq("NO", "ACTION"): 4124 action = "NO ACTION" 4125 elif self._match(TokenType.SET): 4126 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4127 action = "SET " + self._prev.text.upper() 4128 else: 4129 self._advance() 4130 action = self._prev.text.upper() 4131 4132 options[kind] = action 4133 4134 return self.expression( 4135 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4136 ) 4137 4138 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4139 return self._parse_field() 4140 4141 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint: 4142 self._match(TokenType.TIMESTAMP_SNAPSHOT) 4143 4144 id_vars = self._parse_wrapped_id_vars() 4145 return self.expression( 4146 exp.PeriodForSystemTimeConstraint, 4147 this=seq_get(id_vars, 0), 4148 expression=seq_get(id_vars, 1), 4149 ) 4150 4151 def _parse_primary_key( 4152 self, wrapped_optional: bool = False, in_props: bool = False 4153 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4154 desc = ( 4155 self._match_set((TokenType.ASC, TokenType.DESC)) 4156 and self._prev.token_type == TokenType.DESC 4157 ) 4158 4159 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4160 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4161 4162 expressions = self._parse_wrapped_csv( 4163 self._parse_primary_key_part, optional=wrapped_optional 4164 ) 4165 options = self._parse_key_constraint_options() 4166 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4167 4168 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4169 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4170 return this 4171 4172 bracket_kind = self._prev.token_type 4173 4174 if self._match(TokenType.COLON): 4175 expressions: t.List[exp.Expression] = [ 4176 self.expression(exp.Slice, expression=self._parse_conjunction()) 4177 ] 4178 else: 4179 expressions = self._parse_csv( 4180 lambda: self._parse_slice( 4181 self._parse_alias(self._parse_conjunction(), explicit=True) 4182 ) 4183 ) 4184 4185 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4186 self.raise_error("Expected ]") 4187 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4188 self.raise_error("Expected }") 4189 4190 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4191 if bracket_kind == TokenType.L_BRACE: 4192 this = self.expression(exp.Struct, expressions=expressions) 4193 elif not this or this.name.upper() == "ARRAY": 4194 this = self.expression(exp.Array, expressions=expressions) 4195 else: 4196 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4197 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4198 4199 self._add_comments(this) 4200 return self._parse_bracket(this) 4201 4202 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4203 if self._match(TokenType.COLON): 4204 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4205 return this 4206 4207 def _parse_case(self) -> t.Optional[exp.Expression]: 4208 ifs = [] 4209 default = None 4210 4211 comments = self._prev_comments 4212 expression = self._parse_conjunction() 4213 4214 while self._match(TokenType.WHEN): 4215 this = self._parse_conjunction() 4216 self._match(TokenType.THEN) 4217 then = self._parse_conjunction() 4218 ifs.append(self.expression(exp.If, this=this, true=then)) 4219 4220 if self._match(TokenType.ELSE): 4221 default = self._parse_conjunction() 4222 4223 if not self._match(TokenType.END): 4224 self.raise_error("Expected END after CASE", self._prev) 4225 4226 return self._parse_window( 4227 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4228 ) 4229 4230 def _parse_if(self) -> t.Optional[exp.Expression]: 4231 if self._match(TokenType.L_PAREN): 4232 args = self._parse_csv(self._parse_conjunction) 4233 this = self.validate_expression(exp.If.from_arg_list(args), args) 4234 self._match_r_paren() 4235 else: 4236 index = self._index - 1 4237 condition = self._parse_conjunction() 4238 4239 if not condition: 4240 self._retreat(index) 4241 return None 4242 4243 self._match(TokenType.THEN) 4244 true = self._parse_conjunction() 4245 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4246 self._match(TokenType.END) 4247 this = self.expression(exp.If, this=condition, true=true, false=false) 4248 4249 return self._parse_window(this) 4250 4251 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4252 if not self._match_text_seq("VALUE", "FOR"): 4253 self._retreat(self._index - 1) 4254 return None 4255 4256 return self.expression( 4257 exp.NextValueFor, 4258 this=self._parse_column(), 4259 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4260 ) 4261 4262 def _parse_extract(self) -> exp.Extract: 4263 this = self._parse_function() or self._parse_var() or self._parse_type() 4264 4265 if self._match(TokenType.FROM): 4266 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4267 4268 if not self._match(TokenType.COMMA): 4269 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4270 4271 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4272 4273 def _parse_any_value(self) -> exp.AnyValue: 4274 this = self._parse_lambda() 4275 is_max = None 4276 having = None 4277 4278 if self._match(TokenType.HAVING): 4279 self._match_texts(("MAX", "MIN")) 4280 is_max = self._prev.text == "MAX" 4281 having = self._parse_column() 4282 4283 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4284 4285 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4286 this = self._parse_conjunction() 4287 4288 if not self._match(TokenType.ALIAS): 4289 if self._match(TokenType.COMMA): 4290 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4291 4292 self.raise_error("Expected AS after CAST") 4293 4294 fmt = None 4295 to = self._parse_types() 4296 4297 if self._match(TokenType.FORMAT): 4298 fmt_string = self._parse_string() 4299 fmt = self._parse_at_time_zone(fmt_string) 4300 4301 if not to: 4302 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4303 if to.this in exp.DataType.TEMPORAL_TYPES: 4304 this = self.expression( 4305 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4306 this=this, 4307 format=exp.Literal.string( 4308 format_time( 4309 fmt_string.this if fmt_string else "", 4310 self.FORMAT_MAPPING or self.TIME_MAPPING, 4311 self.FORMAT_TRIE or self.TIME_TRIE, 4312 ) 4313 ), 4314 ) 4315 4316 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4317 this.set("zone", fmt.args["zone"]) 4318 return this 4319 elif not to: 4320 self.raise_error("Expected TYPE after CAST") 4321 elif isinstance(to, exp.Identifier): 4322 to = exp.DataType.build(to.name, udt=True) 4323 elif to.this == exp.DataType.Type.CHAR: 4324 if self._match(TokenType.CHARACTER_SET): 4325 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4326 4327 return self.expression( 4328 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4329 ) 4330 4331 def _parse_concat(self) -> t.Optional[exp.Expression]: 4332 args = self._parse_csv(self._parse_conjunction) 4333 if self.CONCAT_NULL_OUTPUTS_STRING: 4334 args = self._ensure_string_if_null(args) 4335 4336 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4337 # we find such a call we replace it with its argument. 4338 if len(args) == 1: 4339 return args[0] 4340 4341 return self.expression( 4342 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4343 ) 4344 4345 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4346 args = self._parse_csv(self._parse_conjunction) 4347 if len(args) < 2: 4348 return self.expression(exp.ConcatWs, expressions=args) 4349 delim, *values = args 4350 if self.CONCAT_NULL_OUTPUTS_STRING: 4351 values = self._ensure_string_if_null(values) 4352 4353 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4354 4355 def _parse_string_agg(self) -> exp.Expression: 4356 if self._match(TokenType.DISTINCT): 4357 args: t.List[t.Optional[exp.Expression]] = [ 4358 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4359 ] 4360 if self._match(TokenType.COMMA): 4361 args.extend(self._parse_csv(self._parse_conjunction)) 4362 else: 4363 args = self._parse_csv(self._parse_conjunction) # type: ignore 4364 4365 index = self._index 4366 if not self._match(TokenType.R_PAREN) and args: 4367 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4368 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4369 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4370 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4371 4372 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4373 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4374 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4375 if not self._match_text_seq("WITHIN", "GROUP"): 4376 self._retreat(index) 4377 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4378 4379 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4380 order = self._parse_order(this=seq_get(args, 0)) 4381 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4382 4383 def _parse_convert( 4384 self, strict: bool, safe: t.Optional[bool] = None 4385 ) -> t.Optional[exp.Expression]: 4386 this = self._parse_bitwise() 4387 4388 if self._match(TokenType.USING): 4389 to: t.Optional[exp.Expression] = self.expression( 4390 exp.CharacterSet, this=self._parse_var() 4391 ) 4392 elif self._match(TokenType.COMMA): 4393 to = self._parse_types() 4394 else: 4395 to = None 4396 4397 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4398 4399 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4400 """ 4401 There are generally two variants of the DECODE function: 4402 4403 - DECODE(bin, charset) 4404 - DECODE(expression, search, result [, search, result] ... [, default]) 4405 4406 The second variant will always be parsed into a CASE expression. Note that NULL 4407 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4408 instead of relying on pattern matching. 4409 """ 4410 args = self._parse_csv(self._parse_conjunction) 4411 4412 if len(args) < 3: 4413 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4414 4415 expression, *expressions = args 4416 if not expression: 4417 return None 4418 4419 ifs = [] 4420 for search, result in zip(expressions[::2], expressions[1::2]): 4421 if not search or not result: 4422 return None 4423 4424 if isinstance(search, exp.Literal): 4425 ifs.append( 4426 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4427 ) 4428 elif isinstance(search, exp.Null): 4429 ifs.append( 4430 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4431 ) 4432 else: 4433 cond = exp.or_( 4434 exp.EQ(this=expression.copy(), expression=search), 4435 exp.and_( 4436 exp.Is(this=expression.copy(), expression=exp.Null()), 4437 exp.Is(this=search.copy(), expression=exp.Null()), 4438 copy=False, 4439 ), 4440 copy=False, 4441 ) 4442 ifs.append(exp.If(this=cond, true=result)) 4443 4444 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4445 4446 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4447 self._match_text_seq("KEY") 4448 key = self._parse_column() 4449 self._match_set((TokenType.COLON, TokenType.COMMA)) 4450 self._match_text_seq("VALUE") 4451 value = self._parse_bitwise() 4452 4453 if not key and not value: 4454 return None 4455 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4456 4457 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4458 if not this or not self._match_text_seq("FORMAT", "JSON"): 4459 return this 4460 4461 return self.expression(exp.FormatJson, this=this) 4462 4463 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4464 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4465 for value in values: 4466 if self._match_text_seq(value, "ON", on): 4467 return f"{value} ON {on}" 4468 4469 return None 4470 4471 def _parse_json_object(self) -> exp.JSONObject: 4472 star = self._parse_star() 4473 expressions = ( 4474 [star] 4475 if star 4476 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4477 ) 4478 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4479 4480 unique_keys = None 4481 if self._match_text_seq("WITH", "UNIQUE"): 4482 unique_keys = True 4483 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4484 unique_keys = False 4485 4486 self._match_text_seq("KEYS") 4487 4488 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4489 self._parse_type() 4490 ) 4491 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4492 4493 return self.expression( 4494 exp.JSONObject, 4495 expressions=expressions, 4496 null_handling=null_handling, 4497 unique_keys=unique_keys, 4498 return_type=return_type, 4499 encoding=encoding, 4500 ) 4501 4502 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4503 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4504 if not self._match_text_seq("NESTED"): 4505 this = self._parse_id_var() 4506 kind = self._parse_types(allow_identifiers=False) 4507 nested = None 4508 else: 4509 this = None 4510 kind = None 4511 nested = True 4512 4513 path = self._match_text_seq("PATH") and self._parse_string() 4514 nested_schema = nested and self._parse_json_schema() 4515 4516 return self.expression( 4517 exp.JSONColumnDef, 4518 this=this, 4519 kind=kind, 4520 path=path, 4521 nested_schema=nested_schema, 4522 ) 4523 4524 def _parse_json_schema(self) -> exp.JSONSchema: 4525 self._match_text_seq("COLUMNS") 4526 return self.expression( 4527 exp.JSONSchema, 4528 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4529 ) 4530 4531 def _parse_json_table(self) -> exp.JSONTable: 4532 this = self._parse_format_json(self._parse_bitwise()) 4533 path = self._match(TokenType.COMMA) and self._parse_string() 4534 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4535 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4536 schema = self._parse_json_schema() 4537 4538 return exp.JSONTable( 4539 this=this, 4540 schema=schema, 4541 path=path, 4542 error_handling=error_handling, 4543 empty_handling=empty_handling, 4544 ) 4545 4546 def _parse_logarithm(self) -> exp.Func: 4547 # Default argument order is base, expression 4548 args = self._parse_csv(self._parse_range) 4549 4550 if len(args) > 1: 4551 if not self.LOG_BASE_FIRST: 4552 args.reverse() 4553 return exp.Log.from_arg_list(args) 4554 4555 return self.expression( 4556 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4557 ) 4558 4559 def _parse_match_against(self) -> exp.MatchAgainst: 4560 expressions = self._parse_csv(self._parse_column) 4561 4562 self._match_text_seq(")", "AGAINST", "(") 4563 4564 this = self._parse_string() 4565 4566 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4567 modifier = "IN NATURAL LANGUAGE MODE" 4568 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4569 modifier = f"{modifier} WITH QUERY EXPANSION" 4570 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4571 modifier = "IN BOOLEAN MODE" 4572 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4573 modifier = "WITH QUERY EXPANSION" 4574 else: 4575 modifier = None 4576 4577 return self.expression( 4578 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4579 ) 4580 4581 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4582 def _parse_open_json(self) -> exp.OpenJSON: 4583 this = self._parse_bitwise() 4584 path = self._match(TokenType.COMMA) and self._parse_string() 4585 4586 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4587 this = self._parse_field(any_token=True) 4588 kind = self._parse_types() 4589 path = self._parse_string() 4590 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4591 4592 return self.expression( 4593 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4594 ) 4595 4596 expressions = None 4597 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4598 self._match_l_paren() 4599 expressions = self._parse_csv(_parse_open_json_column_def) 4600 4601 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4602 4603 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4604 args = self._parse_csv(self._parse_bitwise) 4605 4606 if self._match(TokenType.IN): 4607 return self.expression( 4608 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4609 ) 4610 4611 if haystack_first: 4612 haystack = seq_get(args, 0) 4613 needle = seq_get(args, 1) 4614 else: 4615 needle = seq_get(args, 0) 4616 haystack = seq_get(args, 1) 4617 4618 return self.expression( 4619 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4620 ) 4621 4622 def _parse_predict(self) -> exp.Predict: 4623 self._match_text_seq("MODEL") 4624 this = self._parse_table() 4625 4626 self._match(TokenType.COMMA) 4627 self._match_text_seq("TABLE") 4628 4629 return self.expression( 4630 exp.Predict, 4631 this=this, 4632 expression=self._parse_table(), 4633 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4634 ) 4635 4636 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4637 args = self._parse_csv(self._parse_table) 4638 return exp.JoinHint(this=func_name.upper(), expressions=args) 4639 4640 def _parse_substring(self) -> exp.Substring: 4641 # Postgres supports the form: substring(string [from int] [for int]) 4642 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4643 4644 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4645 4646 if self._match(TokenType.FROM): 4647 args.append(self._parse_bitwise()) 4648 if self._match(TokenType.FOR): 4649 args.append(self._parse_bitwise()) 4650 4651 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4652 4653 def _parse_trim(self) -> exp.Trim: 4654 # https://www.w3resource.com/sql/character-functions/trim.php 4655 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4656 4657 position = None 4658 collation = None 4659 expression = None 4660 4661 if self._match_texts(self.TRIM_TYPES): 4662 position = self._prev.text.upper() 4663 4664 this = self._parse_bitwise() 4665 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4666 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4667 expression = self._parse_bitwise() 4668 4669 if invert_order: 4670 this, expression = expression, this 4671 4672 if self._match(TokenType.COLLATE): 4673 collation = self._parse_bitwise() 4674 4675 return self.expression( 4676 exp.Trim, this=this, position=position, expression=expression, collation=collation 4677 ) 4678 4679 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4680 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4681 4682 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4683 return self._parse_window(self._parse_id_var(), alias=True) 4684 4685 def _parse_respect_or_ignore_nulls( 4686 self, this: t.Optional[exp.Expression] 4687 ) -> t.Optional[exp.Expression]: 4688 if self._match_text_seq("IGNORE", "NULLS"): 4689 return self.expression(exp.IgnoreNulls, this=this) 4690 if self._match_text_seq("RESPECT", "NULLS"): 4691 return self.expression(exp.RespectNulls, this=this) 4692 return this 4693 4694 def _parse_window( 4695 self, this: t.Optional[exp.Expression], alias: bool = False 4696 ) -> t.Optional[exp.Expression]: 4697 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4698 self._match(TokenType.WHERE) 4699 this = self.expression( 4700 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4701 ) 4702 self._match_r_paren() 4703 4704 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4705 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4706 if self._match_text_seq("WITHIN", "GROUP"): 4707 order = self._parse_wrapped(self._parse_order) 4708 this = self.expression(exp.WithinGroup, this=this, expression=order) 4709 4710 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4711 # Some dialects choose to implement and some do not. 4712 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4713 4714 # There is some code above in _parse_lambda that handles 4715 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4716 4717 # The below changes handle 4718 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4719 4720 # Oracle allows both formats 4721 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4722 # and Snowflake chose to do the same for familiarity 4723 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4724 this = self._parse_respect_or_ignore_nulls(this) 4725 4726 # bigquery select from window x AS (partition by ...) 4727 if alias: 4728 over = None 4729 self._match(TokenType.ALIAS) 4730 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4731 return this 4732 else: 4733 over = self._prev.text.upper() 4734 4735 if not self._match(TokenType.L_PAREN): 4736 return self.expression( 4737 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4738 ) 4739 4740 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4741 4742 first = self._match(TokenType.FIRST) 4743 if self._match_text_seq("LAST"): 4744 first = False 4745 4746 partition, order = self._parse_partition_and_order() 4747 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4748 4749 if kind: 4750 self._match(TokenType.BETWEEN) 4751 start = self._parse_window_spec() 4752 self._match(TokenType.AND) 4753 end = self._parse_window_spec() 4754 4755 spec = self.expression( 4756 exp.WindowSpec, 4757 kind=kind, 4758 start=start["value"], 4759 start_side=start["side"], 4760 end=end["value"], 4761 end_side=end["side"], 4762 ) 4763 else: 4764 spec = None 4765 4766 self._match_r_paren() 4767 4768 window = self.expression( 4769 exp.Window, 4770 this=this, 4771 partition_by=partition, 4772 order=order, 4773 spec=spec, 4774 alias=window_alias, 4775 over=over, 4776 first=first, 4777 ) 4778 4779 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4780 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4781 return self._parse_window(window, alias=alias) 4782 4783 return window 4784 4785 def _parse_partition_and_order( 4786 self, 4787 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4788 return self._parse_partition_by(), self._parse_order() 4789 4790 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4791 self._match(TokenType.BETWEEN) 4792 4793 return { 4794 "value": ( 4795 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4796 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4797 or self._parse_bitwise() 4798 ), 4799 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4800 } 4801 4802 def _parse_alias( 4803 self, this: t.Optional[exp.Expression], explicit: bool = False 4804 ) -> t.Optional[exp.Expression]: 4805 any_token = self._match(TokenType.ALIAS) 4806 4807 if explicit and not any_token: 4808 return this 4809 4810 if self._match(TokenType.L_PAREN): 4811 aliases = self.expression( 4812 exp.Aliases, 4813 this=this, 4814 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4815 ) 4816 self._match_r_paren(aliases) 4817 return aliases 4818 4819 alias = self._parse_id_var(any_token) 4820 4821 if alias: 4822 return self.expression(exp.Alias, this=this, alias=alias) 4823 4824 return this 4825 4826 def _parse_id_var( 4827 self, 4828 any_token: bool = True, 4829 tokens: t.Optional[t.Collection[TokenType]] = None, 4830 ) -> t.Optional[exp.Expression]: 4831 identifier = self._parse_identifier() 4832 4833 if identifier: 4834 return identifier 4835 4836 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4837 quoted = self._prev.token_type == TokenType.STRING 4838 return exp.Identifier(this=self._prev.text, quoted=quoted) 4839 4840 return None 4841 4842 def _parse_string(self) -> t.Optional[exp.Expression]: 4843 if self._match(TokenType.STRING): 4844 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4845 return self._parse_placeholder() 4846 4847 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4848 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4849 4850 def _parse_number(self) -> t.Optional[exp.Expression]: 4851 if self._match(TokenType.NUMBER): 4852 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4853 return self._parse_placeholder() 4854 4855 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4856 if self._match(TokenType.IDENTIFIER): 4857 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4858 return self._parse_placeholder() 4859 4860 def _parse_var( 4861 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4862 ) -> t.Optional[exp.Expression]: 4863 if ( 4864 (any_token and self._advance_any()) 4865 or self._match(TokenType.VAR) 4866 or (self._match_set(tokens) if tokens else False) 4867 ): 4868 return self.expression(exp.Var, this=self._prev.text) 4869 return self._parse_placeholder() 4870 4871 def _advance_any(self) -> t.Optional[Token]: 4872 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4873 self._advance() 4874 return self._prev 4875 return None 4876 4877 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4878 return self._parse_var() or self._parse_string() 4879 4880 def _parse_null(self) -> t.Optional[exp.Expression]: 4881 if self._match_set(self.NULL_TOKENS): 4882 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4883 return self._parse_placeholder() 4884 4885 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4886 if self._match(TokenType.TRUE): 4887 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4888 if self._match(TokenType.FALSE): 4889 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4890 return self._parse_placeholder() 4891 4892 def _parse_star(self) -> t.Optional[exp.Expression]: 4893 if self._match(TokenType.STAR): 4894 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4895 return self._parse_placeholder() 4896 4897 def _parse_parameter(self) -> exp.Parameter: 4898 def _parse_parameter_part() -> t.Optional[exp.Expression]: 4899 return ( 4900 self._parse_identifier() or self._parse_primary() or self._parse_var(any_token=True) 4901 ) 4902 4903 self._match(TokenType.L_BRACE) 4904 this = _parse_parameter_part() 4905 expression = self._match(TokenType.COLON) and _parse_parameter_part() 4906 self._match(TokenType.R_BRACE) 4907 4908 return self.expression(exp.Parameter, this=this, expression=expression) 4909 4910 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4911 if self._match_set(self.PLACEHOLDER_PARSERS): 4912 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4913 if placeholder: 4914 return placeholder 4915 self._advance(-1) 4916 return None 4917 4918 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4919 if not self._match(TokenType.EXCEPT): 4920 return None 4921 if self._match(TokenType.L_PAREN, advance=False): 4922 return self._parse_wrapped_csv(self._parse_column) 4923 4924 except_column = self._parse_column() 4925 return [except_column] if except_column else None 4926 4927 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4928 if not self._match(TokenType.REPLACE): 4929 return None 4930 if self._match(TokenType.L_PAREN, advance=False): 4931 return self._parse_wrapped_csv(self._parse_expression) 4932 4933 replace_expression = self._parse_expression() 4934 return [replace_expression] if replace_expression else None 4935 4936 def _parse_csv( 4937 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4938 ) -> t.List[exp.Expression]: 4939 parse_result = parse_method() 4940 items = [parse_result] if parse_result is not None else [] 4941 4942 while self._match(sep): 4943 self._add_comments(parse_result) 4944 parse_result = parse_method() 4945 if parse_result is not None: 4946 items.append(parse_result) 4947 4948 return items 4949 4950 def _parse_tokens( 4951 self, parse_method: t.Callable, expressions: t.Dict 4952 ) -> t.Optional[exp.Expression]: 4953 this = parse_method() 4954 4955 while self._match_set(expressions): 4956 this = self.expression( 4957 expressions[self._prev.token_type], 4958 this=this, 4959 comments=self._prev_comments, 4960 expression=parse_method(), 4961 ) 4962 4963 return this 4964 4965 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4966 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4967 4968 def _parse_wrapped_csv( 4969 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4970 ) -> t.List[exp.Expression]: 4971 return self._parse_wrapped( 4972 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4973 ) 4974 4975 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4976 wrapped = self._match(TokenType.L_PAREN) 4977 if not wrapped and not optional: 4978 self.raise_error("Expecting (") 4979 parse_result = parse_method() 4980 if wrapped: 4981 self._match_r_paren() 4982 return parse_result 4983 4984 def _parse_expressions(self) -> t.List[exp.Expression]: 4985 return self._parse_csv(self._parse_expression) 4986 4987 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4988 return self._parse_select() or self._parse_set_operations( 4989 self._parse_expression() if alias else self._parse_conjunction() 4990 ) 4991 4992 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4993 return self._parse_query_modifiers( 4994 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4995 ) 4996 4997 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4998 this = None 4999 if self._match_texts(self.TRANSACTION_KIND): 5000 this = self._prev.text 5001 5002 self._match_texts({"TRANSACTION", "WORK"}) 5003 5004 modes = [] 5005 while True: 5006 mode = [] 5007 while self._match(TokenType.VAR): 5008 mode.append(self._prev.text) 5009 5010 if mode: 5011 modes.append(" ".join(mode)) 5012 if not self._match(TokenType.COMMA): 5013 break 5014 5015 return self.expression(exp.Transaction, this=this, modes=modes) 5016 5017 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5018 chain = None 5019 savepoint = None 5020 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5021 5022 self._match_texts({"TRANSACTION", "WORK"}) 5023 5024 if self._match_text_seq("TO"): 5025 self._match_text_seq("SAVEPOINT") 5026 savepoint = self._parse_id_var() 5027 5028 if self._match(TokenType.AND): 5029 chain = not self._match_text_seq("NO") 5030 self._match_text_seq("CHAIN") 5031 5032 if is_rollback: 5033 return self.expression(exp.Rollback, savepoint=savepoint) 5034 5035 return self.expression(exp.Commit, chain=chain) 5036 5037 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5038 if not self._match_text_seq("ADD"): 5039 return None 5040 5041 self._match(TokenType.COLUMN) 5042 exists_column = self._parse_exists(not_=True) 5043 expression = self._parse_field_def() 5044 5045 if expression: 5046 expression.set("exists", exists_column) 5047 5048 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5049 if self._match_texts(("FIRST", "AFTER")): 5050 position = self._prev.text 5051 column_position = self.expression( 5052 exp.ColumnPosition, this=self._parse_column(), position=position 5053 ) 5054 expression.set("position", column_position) 5055 5056 return expression 5057 5058 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5059 drop = self._match(TokenType.DROP) and self._parse_drop() 5060 if drop and not isinstance(drop, exp.Command): 5061 drop.set("kind", drop.args.get("kind", "COLUMN")) 5062 return drop 5063 5064 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5065 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5066 return self.expression( 5067 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5068 ) 5069 5070 def _parse_add_constraint(self) -> exp.AddConstraint: 5071 this = None 5072 kind = self._prev.token_type 5073 5074 if kind == TokenType.CONSTRAINT: 5075 this = self._parse_id_var() 5076 5077 if self._match_text_seq("CHECK"): 5078 expression = self._parse_wrapped(self._parse_conjunction) 5079 enforced = self._match_text_seq("ENFORCED") 5080 5081 return self.expression( 5082 exp.AddConstraint, this=this, expression=expression, enforced=enforced 5083 ) 5084 5085 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 5086 expression = self._parse_foreign_key() 5087 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 5088 expression = self._parse_primary_key() 5089 else: 5090 expression = None 5091 5092 return self.expression(exp.AddConstraint, this=this, expression=expression) 5093 5094 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5095 index = self._index - 1 5096 5097 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 5098 return self._parse_csv(self._parse_add_constraint) 5099 5100 self._retreat(index) 5101 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 5102 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5103 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5104 5105 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5106 self._match(TokenType.COLUMN) 5107 column = self._parse_field(any_token=True) 5108 5109 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5110 return self.expression(exp.AlterColumn, this=column, drop=True) 5111 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5112 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5113 5114 self._match_text_seq("SET", "DATA") 5115 return self.expression( 5116 exp.AlterColumn, 5117 this=column, 5118 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5119 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5120 using=self._match(TokenType.USING) and self._parse_conjunction(), 5121 ) 5122 5123 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5124 index = self._index - 1 5125 5126 partition_exists = self._parse_exists() 5127 if self._match(TokenType.PARTITION, advance=False): 5128 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5129 5130 self._retreat(index) 5131 return self._parse_csv(self._parse_drop_column) 5132 5133 def _parse_alter_table_rename(self) -> exp.RenameTable: 5134 self._match_text_seq("TO") 5135 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5136 5137 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5138 start = self._prev 5139 5140 if not self._match(TokenType.TABLE): 5141 return self._parse_as_command(start) 5142 5143 exists = self._parse_exists() 5144 only = self._match_text_seq("ONLY") 5145 this = self._parse_table(schema=True) 5146 5147 if self._next: 5148 self._advance() 5149 5150 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5151 if parser: 5152 actions = ensure_list(parser(self)) 5153 5154 if not self._curr: 5155 return self.expression( 5156 exp.AlterTable, 5157 this=this, 5158 exists=exists, 5159 actions=actions, 5160 only=only, 5161 ) 5162 5163 return self._parse_as_command(start) 5164 5165 def _parse_merge(self) -> exp.Merge: 5166 self._match(TokenType.INTO) 5167 target = self._parse_table() 5168 5169 if target and self._match(TokenType.ALIAS, advance=False): 5170 target.set("alias", self._parse_table_alias()) 5171 5172 self._match(TokenType.USING) 5173 using = self._parse_table() 5174 5175 self._match(TokenType.ON) 5176 on = self._parse_conjunction() 5177 5178 return self.expression( 5179 exp.Merge, 5180 this=target, 5181 using=using, 5182 on=on, 5183 expressions=self._parse_when_matched(), 5184 ) 5185 5186 def _parse_when_matched(self) -> t.List[exp.When]: 5187 whens = [] 5188 5189 while self._match(TokenType.WHEN): 5190 matched = not self._match(TokenType.NOT) 5191 self._match_text_seq("MATCHED") 5192 source = ( 5193 False 5194 if self._match_text_seq("BY", "TARGET") 5195 else self._match_text_seq("BY", "SOURCE") 5196 ) 5197 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5198 5199 self._match(TokenType.THEN) 5200 5201 if self._match(TokenType.INSERT): 5202 _this = self._parse_star() 5203 if _this: 5204 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5205 else: 5206 then = self.expression( 5207 exp.Insert, 5208 this=self._parse_value(), 5209 expression=self._match(TokenType.VALUES) and self._parse_value(), 5210 ) 5211 elif self._match(TokenType.UPDATE): 5212 expressions = self._parse_star() 5213 if expressions: 5214 then = self.expression(exp.Update, expressions=expressions) 5215 else: 5216 then = self.expression( 5217 exp.Update, 5218 expressions=self._match(TokenType.SET) 5219 and self._parse_csv(self._parse_equality), 5220 ) 5221 elif self._match(TokenType.DELETE): 5222 then = self.expression(exp.Var, this=self._prev.text) 5223 else: 5224 then = None 5225 5226 whens.append( 5227 self.expression( 5228 exp.When, 5229 matched=matched, 5230 source=source, 5231 condition=condition, 5232 then=then, 5233 ) 5234 ) 5235 return whens 5236 5237 def _parse_show(self) -> t.Optional[exp.Expression]: 5238 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5239 if parser: 5240 return parser(self) 5241 return self._parse_as_command(self._prev) 5242 5243 def _parse_set_item_assignment( 5244 self, kind: t.Optional[str] = None 5245 ) -> t.Optional[exp.Expression]: 5246 index = self._index 5247 5248 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5249 return self._parse_set_transaction(global_=kind == "GLOBAL") 5250 5251 left = self._parse_primary() or self._parse_id_var() 5252 assignment_delimiter = self._match_texts(("=", "TO")) 5253 5254 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5255 self._retreat(index) 5256 return None 5257 5258 right = self._parse_statement() or self._parse_id_var() 5259 this = self.expression(exp.EQ, this=left, expression=right) 5260 5261 return self.expression(exp.SetItem, this=this, kind=kind) 5262 5263 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5264 self._match_text_seq("TRANSACTION") 5265 characteristics = self._parse_csv( 5266 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5267 ) 5268 return self.expression( 5269 exp.SetItem, 5270 expressions=characteristics, 5271 kind="TRANSACTION", 5272 **{"global": global_}, # type: ignore 5273 ) 5274 5275 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5276 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5277 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5278 5279 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5280 index = self._index 5281 set_ = self.expression( 5282 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5283 ) 5284 5285 if self._curr: 5286 self._retreat(index) 5287 return self._parse_as_command(self._prev) 5288 5289 return set_ 5290 5291 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5292 for option in options: 5293 if self._match_text_seq(*option.split(" ")): 5294 return exp.var(option) 5295 return None 5296 5297 def _parse_as_command(self, start: Token) -> exp.Command: 5298 while self._curr: 5299 self._advance() 5300 text = self._find_sql(start, self._prev) 5301 size = len(start.text) 5302 return exp.Command(this=text[:size], expression=text[size:]) 5303 5304 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5305 settings = [] 5306 5307 self._match_l_paren() 5308 kind = self._parse_id_var() 5309 5310 if self._match(TokenType.L_PAREN): 5311 while True: 5312 key = self._parse_id_var() 5313 value = self._parse_primary() 5314 5315 if not key and value is None: 5316 break 5317 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5318 self._match(TokenType.R_PAREN) 5319 5320 self._match_r_paren() 5321 5322 return self.expression( 5323 exp.DictProperty, 5324 this=this, 5325 kind=kind.this if kind else None, 5326 settings=settings, 5327 ) 5328 5329 def _parse_dict_range(self, this: str) -> exp.DictRange: 5330 self._match_l_paren() 5331 has_min = self._match_text_seq("MIN") 5332 if has_min: 5333 min = self._parse_var() or self._parse_primary() 5334 self._match_text_seq("MAX") 5335 max = self._parse_var() or self._parse_primary() 5336 else: 5337 max = self._parse_var() or self._parse_primary() 5338 min = exp.Literal.number(0) 5339 self._match_r_paren() 5340 return self.expression(exp.DictRange, this=this, min=min, max=max) 5341 5342 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5343 index = self._index 5344 expression = self._parse_column() 5345 if not self._match(TokenType.IN): 5346 self._retreat(index - 1) 5347 return None 5348 iterator = self._parse_column() 5349 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5350 return self.expression( 5351 exp.Comprehension, 5352 this=this, 5353 expression=expression, 5354 iterator=iterator, 5355 condition=condition, 5356 ) 5357 5358 def _find_parser( 5359 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5360 ) -> t.Optional[t.Callable]: 5361 if not self._curr: 5362 return None 5363 5364 index = self._index 5365 this = [] 5366 while True: 5367 # The current token might be multiple words 5368 curr = self._curr.text.upper() 5369 key = curr.split(" ") 5370 this.append(curr) 5371 5372 self._advance() 5373 result, trie = in_trie(trie, key) 5374 if result == TrieResult.FAILED: 5375 break 5376 5377 if result == TrieResult.EXISTS: 5378 subparser = parsers[" ".join(this)] 5379 return subparser 5380 5381 self._retreat(index) 5382 return None 5383 5384 def _match(self, token_type, advance=True, expression=None): 5385 if not self._curr: 5386 return None 5387 5388 if self._curr.token_type == token_type: 5389 if advance: 5390 self._advance() 5391 self._add_comments(expression) 5392 return True 5393 5394 return None 5395 5396 def _match_set(self, types, advance=True): 5397 if not self._curr: 5398 return None 5399 5400 if self._curr.token_type in types: 5401 if advance: 5402 self._advance() 5403 return True 5404 5405 return None 5406 5407 def _match_pair(self, token_type_a, token_type_b, advance=True): 5408 if not self._curr or not self._next: 5409 return None 5410 5411 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5412 if advance: 5413 self._advance(2) 5414 return True 5415 5416 return None 5417 5418 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5419 if not self._match(TokenType.L_PAREN, expression=expression): 5420 self.raise_error("Expecting (") 5421 5422 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5423 if not self._match(TokenType.R_PAREN, expression=expression): 5424 self.raise_error("Expecting )") 5425 5426 def _match_texts(self, texts, advance=True): 5427 if self._curr and self._curr.text.upper() in texts: 5428 if advance: 5429 self._advance() 5430 return True 5431 return False 5432 5433 def _match_text_seq(self, *texts, advance=True): 5434 index = self._index 5435 for text in texts: 5436 if self._curr and self._curr.text.upper() == text: 5437 self._advance() 5438 else: 5439 self._retreat(index) 5440 return False 5441 5442 if not advance: 5443 self._retreat(index) 5444 5445 return True 5446 5447 @t.overload 5448 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5449 ... 5450 5451 @t.overload 5452 def _replace_columns_with_dots( 5453 self, this: t.Optional[exp.Expression] 5454 ) -> t.Optional[exp.Expression]: 5455 ... 5456 5457 def _replace_columns_with_dots(self, this): 5458 if isinstance(this, exp.Dot): 5459 exp.replace_children(this, self._replace_columns_with_dots) 5460 elif isinstance(this, exp.Column): 5461 exp.replace_children(this, self._replace_columns_with_dots) 5462 table = this.args.get("table") 5463 this = ( 5464 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5465 ) 5466 5467 return this 5468 5469 def _replace_lambda( 5470 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5471 ) -> t.Optional[exp.Expression]: 5472 if not node: 5473 return node 5474 5475 for column in node.find_all(exp.Column): 5476 if column.parts[0].name in lambda_variables: 5477 dot_or_id = column.to_dot() if column.table else column.this 5478 parent = column.parent 5479 5480 while isinstance(parent, exp.Dot): 5481 if not isinstance(parent.parent, exp.Dot): 5482 parent.replace(dot_or_id) 5483 break 5484 parent = parent.parent 5485 else: 5486 if column is node: 5487 node = dot_or_id 5488 else: 5489 column.replace(dot_or_id) 5490 return node 5491 5492 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5493 return [ 5494 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5495 for value in values 5496 if value 5497 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
957 def __init__( 958 self, 959 error_level: t.Optional[ErrorLevel] = None, 960 error_message_context: int = 100, 961 max_errors: int = 3, 962 ): 963 self.error_level = error_level or ErrorLevel.IMMEDIATE 964 self.error_message_context = error_message_context 965 self.max_errors = max_errors 966 self._tokenizer = self.TOKENIZER_CLASS() 967 self.reset()
979 def parse( 980 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 981 ) -> t.List[t.Optional[exp.Expression]]: 982 """ 983 Parses a list of tokens and returns a list of syntax trees, one tree 984 per parsed SQL statement. 985 986 Args: 987 raw_tokens: The list of tokens. 988 sql: The original SQL string, used to produce helpful debug messages. 989 990 Returns: 991 The list of the produced syntax trees. 992 """ 993 return self._parse( 994 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 995 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
997 def parse_into( 998 self, 999 expression_types: exp.IntoType, 1000 raw_tokens: t.List[Token], 1001 sql: t.Optional[str] = None, 1002 ) -> t.List[t.Optional[exp.Expression]]: 1003 """ 1004 Parses a list of tokens into a given Expression type. If a collection of Expression 1005 types is given instead, this method will try to parse the token list into each one 1006 of them, stopping at the first for which the parsing succeeds. 1007 1008 Args: 1009 expression_types: The expression type(s) to try and parse the token list into. 1010 raw_tokens: The list of tokens. 1011 sql: The original SQL string, used to produce helpful debug messages. 1012 1013 Returns: 1014 The target Expression. 1015 """ 1016 errors = [] 1017 for expression_type in ensure_list(expression_types): 1018 parser = self.EXPRESSION_PARSERS.get(expression_type) 1019 if not parser: 1020 raise TypeError(f"No parser registered for {expression_type}") 1021 1022 try: 1023 return self._parse(parser, raw_tokens, sql) 1024 except ParseError as e: 1025 e.errors[0]["into_expression"] = expression_type 1026 errors.append(e) 1027 1028 raise ParseError( 1029 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1030 errors=merge_errors(errors), 1031 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1068 def check_errors(self) -> None: 1069 """Logs or raises any found errors, depending on the chosen error level setting.""" 1070 if self.error_level == ErrorLevel.WARN: 1071 for error in self.errors: 1072 logger.error(str(error)) 1073 elif self.error_level == ErrorLevel.RAISE and self.errors: 1074 raise ParseError( 1075 concat_messages(self.errors, self.max_errors), 1076 errors=merge_errors(self.errors), 1077 )
Logs or raises any found errors, depending on the chosen error level setting.
1079 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1080 """ 1081 Appends an error in the list of recorded errors or raises it, depending on the chosen 1082 error level setting. 1083 """ 1084 token = token or self._curr or self._prev or Token.string("") 1085 start = token.start 1086 end = token.end + 1 1087 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1088 highlight = self.sql[start:end] 1089 end_context = self.sql[end : end + self.error_message_context] 1090 1091 error = ParseError.new( 1092 f"{message}. Line {token.line}, Col: {token.col}.\n" 1093 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1094 description=message, 1095 line=token.line, 1096 col=token.col, 1097 start_context=start_context, 1098 highlight=highlight, 1099 end_context=end_context, 1100 ) 1101 1102 if self.error_level == ErrorLevel.IMMEDIATE: 1103 raise error 1104 1105 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1107 def expression( 1108 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1109 ) -> E: 1110 """ 1111 Creates a new, validated Expression. 1112 1113 Args: 1114 exp_class: The expression class to instantiate. 1115 comments: An optional list of comments to attach to the expression. 1116 kwargs: The arguments to set for the expression along with their respective values. 1117 1118 Returns: 1119 The target expression. 1120 """ 1121 instance = exp_class(**kwargs) 1122 instance.add_comments(comments) if comments else self._add_comments(instance) 1123 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1130 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1131 """ 1132 Validates an Expression, making sure that all its mandatory arguments are set. 1133 1134 Args: 1135 expression: The expression to validate. 1136 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1137 1138 Returns: 1139 The validated expression. 1140 """ 1141 if self.error_level != ErrorLevel.IGNORE: 1142 for error_message in expression.error_messages(args): 1143 self.raise_error(error_message) 1144 1145 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.