sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMP_S, 164 TokenType.TIMESTAMP_MS, 165 TokenType.TIMESTAMP_NS, 166 TokenType.TIMESTAMPTZ, 167 TokenType.TIMESTAMPLTZ, 168 TokenType.DATETIME, 169 TokenType.DATETIME64, 170 TokenType.DATE, 171 TokenType.INT4RANGE, 172 TokenType.INT4MULTIRANGE, 173 TokenType.INT8RANGE, 174 TokenType.INT8MULTIRANGE, 175 TokenType.NUMRANGE, 176 TokenType.NUMMULTIRANGE, 177 TokenType.TSRANGE, 178 TokenType.TSMULTIRANGE, 179 TokenType.TSTZRANGE, 180 TokenType.TSTZMULTIRANGE, 181 TokenType.DATERANGE, 182 TokenType.DATEMULTIRANGE, 183 TokenType.DECIMAL, 184 TokenType.UDECIMAL, 185 TokenType.BIGDECIMAL, 186 TokenType.UUID, 187 TokenType.GEOGRAPHY, 188 TokenType.GEOMETRY, 189 TokenType.HLLSKETCH, 190 TokenType.HSTORE, 191 TokenType.PSEUDO_TYPE, 192 TokenType.SUPER, 193 TokenType.SERIAL, 194 TokenType.SMALLSERIAL, 195 TokenType.BIGSERIAL, 196 TokenType.XML, 197 TokenType.YEAR, 198 TokenType.UNIQUEIDENTIFIER, 199 TokenType.USERDEFINED, 200 TokenType.MONEY, 201 TokenType.SMALLMONEY, 202 TokenType.ROWVERSION, 203 TokenType.IMAGE, 204 TokenType.VARIANT, 205 TokenType.OBJECT, 206 TokenType.OBJECT_IDENTIFIER, 207 TokenType.INET, 208 TokenType.IPADDRESS, 209 TokenType.IPPREFIX, 210 TokenType.UNKNOWN, 211 TokenType.NULL, 212 *ENUM_TYPE_TOKENS, 213 *NESTED_TYPE_TOKENS, 214 } 215 216 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 217 TokenType.BIGINT: TokenType.UBIGINT, 218 TokenType.INT: TokenType.UINT, 219 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 220 TokenType.SMALLINT: TokenType.USMALLINT, 221 TokenType.TINYINT: TokenType.UTINYINT, 222 TokenType.DECIMAL: TokenType.UDECIMAL, 223 } 224 225 SUBQUERY_PREDICATES = { 226 TokenType.ANY: exp.Any, 227 TokenType.ALL: exp.All, 228 TokenType.EXISTS: exp.Exists, 229 TokenType.SOME: exp.Any, 230 } 231 232 RESERVED_KEYWORDS = { 233 *Tokenizer.SINGLE_TOKENS.values(), 234 TokenType.SELECT, 235 } 236 237 DB_CREATABLES = { 238 TokenType.DATABASE, 239 TokenType.SCHEMA, 240 TokenType.TABLE, 241 TokenType.VIEW, 242 TokenType.MODEL, 243 TokenType.DICTIONARY, 244 } 245 246 CREATABLES = { 247 TokenType.COLUMN, 248 TokenType.FUNCTION, 249 TokenType.INDEX, 250 TokenType.PROCEDURE, 251 *DB_CREATABLES, 252 } 253 254 # Tokens that can represent identifiers 255 ID_VAR_TOKENS = { 256 TokenType.VAR, 257 TokenType.ANTI, 258 TokenType.APPLY, 259 TokenType.ASC, 260 TokenType.AUTO_INCREMENT, 261 TokenType.BEGIN, 262 TokenType.CACHE, 263 TokenType.CASE, 264 TokenType.COLLATE, 265 TokenType.COMMAND, 266 TokenType.COMMENT, 267 TokenType.COMMIT, 268 TokenType.CONSTRAINT, 269 TokenType.DEFAULT, 270 TokenType.DELETE, 271 TokenType.DESC, 272 TokenType.DESCRIBE, 273 TokenType.DICTIONARY, 274 TokenType.DIV, 275 TokenType.END, 276 TokenType.EXECUTE, 277 TokenType.ESCAPE, 278 TokenType.FALSE, 279 TokenType.FIRST, 280 TokenType.FILTER, 281 TokenType.FORMAT, 282 TokenType.FULL, 283 TokenType.IS, 284 TokenType.ISNULL, 285 TokenType.INTERVAL, 286 TokenType.KEEP, 287 TokenType.KILL, 288 TokenType.LEFT, 289 TokenType.LOAD, 290 TokenType.MERGE, 291 TokenType.NATURAL, 292 TokenType.NEXT, 293 TokenType.OFFSET, 294 TokenType.ORDINALITY, 295 TokenType.OVERLAPS, 296 TokenType.OVERWRITE, 297 TokenType.PARTITION, 298 TokenType.PERCENT, 299 TokenType.PIVOT, 300 TokenType.PRAGMA, 301 TokenType.RANGE, 302 TokenType.REFERENCES, 303 TokenType.RIGHT, 304 TokenType.ROW, 305 TokenType.ROWS, 306 TokenType.SEMI, 307 TokenType.SET, 308 TokenType.SETTINGS, 309 TokenType.SHOW, 310 TokenType.TEMPORARY, 311 TokenType.TOP, 312 TokenType.TRUE, 313 TokenType.UNIQUE, 314 TokenType.UNPIVOT, 315 TokenType.UPDATE, 316 TokenType.USE, 317 TokenType.VOLATILE, 318 TokenType.WINDOW, 319 *CREATABLES, 320 *SUBQUERY_PREDICATES, 321 *TYPE_TOKENS, 322 *NO_PAREN_FUNCTIONS, 323 } 324 325 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 326 327 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 328 TokenType.ANTI, 329 TokenType.APPLY, 330 TokenType.ASOF, 331 TokenType.FULL, 332 TokenType.LEFT, 333 TokenType.LOCK, 334 TokenType.NATURAL, 335 TokenType.OFFSET, 336 TokenType.RIGHT, 337 TokenType.SEMI, 338 TokenType.WINDOW, 339 } 340 341 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 342 343 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 344 345 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 346 347 FUNC_TOKENS = { 348 TokenType.COLLATE, 349 TokenType.COMMAND, 350 TokenType.CURRENT_DATE, 351 TokenType.CURRENT_DATETIME, 352 TokenType.CURRENT_TIMESTAMP, 353 TokenType.CURRENT_TIME, 354 TokenType.CURRENT_USER, 355 TokenType.FILTER, 356 TokenType.FIRST, 357 TokenType.FORMAT, 358 TokenType.GLOB, 359 TokenType.IDENTIFIER, 360 TokenType.INDEX, 361 TokenType.ISNULL, 362 TokenType.ILIKE, 363 TokenType.INSERT, 364 TokenType.LIKE, 365 TokenType.MERGE, 366 TokenType.OFFSET, 367 TokenType.PRIMARY_KEY, 368 TokenType.RANGE, 369 TokenType.REPLACE, 370 TokenType.RLIKE, 371 TokenType.ROW, 372 TokenType.UNNEST, 373 TokenType.VAR, 374 TokenType.LEFT, 375 TokenType.RIGHT, 376 TokenType.DATE, 377 TokenType.DATETIME, 378 TokenType.TABLE, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.WINDOW, 382 TokenType.XOR, 383 *TYPE_TOKENS, 384 *SUBQUERY_PREDICATES, 385 } 386 387 CONJUNCTION = { 388 TokenType.AND: exp.And, 389 TokenType.OR: exp.Or, 390 } 391 392 EQUALITY = { 393 TokenType.EQ: exp.EQ, 394 TokenType.NEQ: exp.NEQ, 395 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 396 } 397 398 COMPARISON = { 399 TokenType.GT: exp.GT, 400 TokenType.GTE: exp.GTE, 401 TokenType.LT: exp.LT, 402 TokenType.LTE: exp.LTE, 403 } 404 405 BITWISE = { 406 TokenType.AMP: exp.BitwiseAnd, 407 TokenType.CARET: exp.BitwiseXor, 408 TokenType.PIPE: exp.BitwiseOr, 409 TokenType.DPIPE: exp.DPipe, 410 } 411 412 TERM = { 413 TokenType.DASH: exp.Sub, 414 TokenType.PLUS: exp.Add, 415 TokenType.MOD: exp.Mod, 416 TokenType.COLLATE: exp.Collate, 417 } 418 419 FACTOR = { 420 TokenType.DIV: exp.IntDiv, 421 TokenType.LR_ARROW: exp.Distance, 422 TokenType.SLASH: exp.Div, 423 TokenType.STAR: exp.Mul, 424 } 425 426 TIMES = { 427 TokenType.TIME, 428 TokenType.TIMETZ, 429 } 430 431 TIMESTAMPS = { 432 TokenType.TIMESTAMP, 433 TokenType.TIMESTAMPTZ, 434 TokenType.TIMESTAMPLTZ, 435 *TIMES, 436 } 437 438 SET_OPERATIONS = { 439 TokenType.UNION, 440 TokenType.INTERSECT, 441 TokenType.EXCEPT, 442 } 443 444 JOIN_METHODS = { 445 TokenType.NATURAL, 446 TokenType.ASOF, 447 } 448 449 JOIN_SIDES = { 450 TokenType.LEFT, 451 TokenType.RIGHT, 452 TokenType.FULL, 453 } 454 455 JOIN_KINDS = { 456 TokenType.INNER, 457 TokenType.OUTER, 458 TokenType.CROSS, 459 TokenType.SEMI, 460 TokenType.ANTI, 461 } 462 463 JOIN_HINTS: t.Set[str] = set() 464 465 LAMBDAS = { 466 TokenType.ARROW: lambda self, expressions: self.expression( 467 exp.Lambda, 468 this=self._replace_lambda( 469 self._parse_conjunction(), 470 {node.name for node in expressions}, 471 ), 472 expressions=expressions, 473 ), 474 TokenType.FARROW: lambda self, expressions: self.expression( 475 exp.Kwarg, 476 this=exp.var(expressions[0].name), 477 expression=self._parse_conjunction(), 478 ), 479 } 480 481 COLUMN_OPERATORS = { 482 TokenType.DOT: None, 483 TokenType.DCOLON: lambda self, this, to: self.expression( 484 exp.Cast if self.STRICT_CAST else exp.TryCast, 485 this=this, 486 to=to, 487 ), 488 TokenType.ARROW: lambda self, this, path: self.expression( 489 exp.JSONExtract, 490 this=this, 491 expression=path, 492 ), 493 TokenType.DARROW: lambda self, this, path: self.expression( 494 exp.JSONExtractScalar, 495 this=this, 496 expression=path, 497 ), 498 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtract, 500 this=this, 501 expression=path, 502 ), 503 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 504 exp.JSONBExtractScalar, 505 this=this, 506 expression=path, 507 ), 508 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 509 exp.JSONBContains, 510 this=this, 511 expression=key, 512 ), 513 } 514 515 EXPRESSION_PARSERS = { 516 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 517 exp.Column: lambda self: self._parse_column(), 518 exp.Condition: lambda self: self._parse_conjunction(), 519 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 520 exp.Expression: lambda self: self._parse_statement(), 521 exp.From: lambda self: self._parse_from(), 522 exp.Group: lambda self: self._parse_group(), 523 exp.Having: lambda self: self._parse_having(), 524 exp.Identifier: lambda self: self._parse_id_var(), 525 exp.Join: lambda self: self._parse_join(), 526 exp.Lambda: lambda self: self._parse_lambda(), 527 exp.Lateral: lambda self: self._parse_lateral(), 528 exp.Limit: lambda self: self._parse_limit(), 529 exp.Offset: lambda self: self._parse_offset(), 530 exp.Order: lambda self: self._parse_order(), 531 exp.Ordered: lambda self: self._parse_ordered(), 532 exp.Properties: lambda self: self._parse_properties(), 533 exp.Qualify: lambda self: self._parse_qualify(), 534 exp.Returning: lambda self: self._parse_returning(), 535 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 536 exp.Table: lambda self: self._parse_table_parts(), 537 exp.TableAlias: lambda self: self._parse_table_alias(), 538 exp.Where: lambda self: self._parse_where(), 539 exp.Window: lambda self: self._parse_named_window(), 540 exp.With: lambda self: self._parse_with(), 541 "JOIN_TYPE": lambda self: self._parse_join_parts(), 542 } 543 544 STATEMENT_PARSERS = { 545 TokenType.ALTER: lambda self: self._parse_alter(), 546 TokenType.BEGIN: lambda self: self._parse_transaction(), 547 TokenType.CACHE: lambda self: self._parse_cache(), 548 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 549 TokenType.COMMENT: lambda self: self._parse_comment(), 550 TokenType.CREATE: lambda self: self._parse_create(), 551 TokenType.DELETE: lambda self: self._parse_delete(), 552 TokenType.DESC: lambda self: self._parse_describe(), 553 TokenType.DESCRIBE: lambda self: self._parse_describe(), 554 TokenType.DROP: lambda self: self._parse_drop(), 555 TokenType.INSERT: lambda self: self._parse_insert(), 556 TokenType.KILL: lambda self: self._parse_kill(), 557 TokenType.LOAD: lambda self: self._parse_load(), 558 TokenType.MERGE: lambda self: self._parse_merge(), 559 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 560 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 561 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 562 TokenType.SET: lambda self: self._parse_set(), 563 TokenType.UNCACHE: lambda self: self._parse_uncache(), 564 TokenType.UPDATE: lambda self: self._parse_update(), 565 TokenType.USE: lambda self: self.expression( 566 exp.Use, 567 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 568 and exp.var(self._prev.text), 569 this=self._parse_table(schema=False), 570 ), 571 } 572 573 UNARY_PARSERS = { 574 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 575 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 576 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 577 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 578 } 579 580 PRIMARY_PARSERS = { 581 TokenType.STRING: lambda self, token: self.expression( 582 exp.Literal, this=token.text, is_string=True 583 ), 584 TokenType.NUMBER: lambda self, token: self.expression( 585 exp.Literal, this=token.text, is_string=False 586 ), 587 TokenType.STAR: lambda self, _: self.expression( 588 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 589 ), 590 TokenType.NULL: lambda self, _: self.expression(exp.Null), 591 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 592 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 593 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 594 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 595 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 596 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 597 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 598 exp.National, this=token.text 599 ), 600 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 601 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 602 exp.RawString, this=token.text 603 ), 604 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 605 } 606 607 PLACEHOLDER_PARSERS = { 608 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 609 TokenType.PARAMETER: lambda self: self._parse_parameter(), 610 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 611 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 612 else None, 613 } 614 615 RANGE_PARSERS = { 616 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 617 TokenType.GLOB: binary_range_parser(exp.Glob), 618 TokenType.ILIKE: binary_range_parser(exp.ILike), 619 TokenType.IN: lambda self, this: self._parse_in(this), 620 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 621 TokenType.IS: lambda self, this: self._parse_is(this), 622 TokenType.LIKE: binary_range_parser(exp.Like), 623 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 624 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 625 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 626 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 627 } 628 629 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 630 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 631 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 632 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 633 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 634 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 635 "CHECKSUM": lambda self: self._parse_checksum(), 636 "CLUSTER BY": lambda self: self._parse_cluster(), 637 "CLUSTERED": lambda self: self._parse_clustered_by(), 638 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 639 exp.CollateProperty, **kwargs 640 ), 641 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 642 "COPY": lambda self: self._parse_copy_property(), 643 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 644 "DEFINER": lambda self: self._parse_definer(), 645 "DETERMINISTIC": lambda self: self.expression( 646 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 647 ), 648 "DISTKEY": lambda self: self._parse_distkey(), 649 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 650 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 651 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 652 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 653 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 654 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 655 "FREESPACE": lambda self: self._parse_freespace(), 656 "HEAP": lambda self: self.expression(exp.HeapProperty), 657 "IMMUTABLE": lambda self: self.expression( 658 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 659 ), 660 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 661 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 662 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 663 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 664 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 665 "LIKE": lambda self: self._parse_create_like(), 666 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 667 "LOCK": lambda self: self._parse_locking(), 668 "LOCKING": lambda self: self._parse_locking(), 669 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 670 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 671 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 672 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 673 "NO": lambda self: self._parse_no_property(), 674 "ON": lambda self: self._parse_on_property(), 675 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 676 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 677 "PARTITION BY": lambda self: self._parse_partitioned_by(), 678 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 679 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 680 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 681 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 682 "REMOTE": lambda self: self._parse_remote_with_connection(), 683 "RETURNS": lambda self: self._parse_returns(), 684 "ROW": lambda self: self._parse_row(), 685 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 686 "SAMPLE": lambda self: self.expression( 687 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 688 ), 689 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 690 "SETTINGS": lambda self: self.expression( 691 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 692 ), 693 "SORTKEY": lambda self: self._parse_sortkey(), 694 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 695 "STABLE": lambda self: self.expression( 696 exp.StabilityProperty, this=exp.Literal.string("STABLE") 697 ), 698 "STORED": lambda self: self._parse_stored(), 699 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 700 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 701 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 702 "TO": lambda self: self._parse_to_table(), 703 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 704 "TRANSFORM": lambda self: self.expression( 705 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 706 ), 707 "TTL": lambda self: self._parse_ttl(), 708 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 709 "VOLATILE": lambda self: self._parse_volatile_property(), 710 "WITH": lambda self: self._parse_with_property(), 711 } 712 713 CONSTRAINT_PARSERS = { 714 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 715 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 716 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 717 "CHARACTER SET": lambda self: self.expression( 718 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 719 ), 720 "CHECK": lambda self: self.expression( 721 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 722 ), 723 "COLLATE": lambda self: self.expression( 724 exp.CollateColumnConstraint, this=self._parse_var() 725 ), 726 "COMMENT": lambda self: self.expression( 727 exp.CommentColumnConstraint, this=self._parse_string() 728 ), 729 "COMPRESS": lambda self: self._parse_compress(), 730 "CLUSTERED": lambda self: self.expression( 731 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 732 ), 733 "NONCLUSTERED": lambda self: self.expression( 734 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 735 ), 736 "DEFAULT": lambda self: self.expression( 737 exp.DefaultColumnConstraint, this=self._parse_bitwise() 738 ), 739 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 740 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 741 "FORMAT": lambda self: self.expression( 742 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 743 ), 744 "GENERATED": lambda self: self._parse_generated_as_identity(), 745 "IDENTITY": lambda self: self._parse_auto_increment(), 746 "INLINE": lambda self: self._parse_inline(), 747 "LIKE": lambda self: self._parse_create_like(), 748 "NOT": lambda self: self._parse_not_constraint(), 749 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 750 "ON": lambda self: ( 751 self._match(TokenType.UPDATE) 752 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 753 ) 754 or self.expression(exp.OnProperty, this=self._parse_id_var()), 755 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 756 "PRIMARY KEY": lambda self: self._parse_primary_key(), 757 "REFERENCES": lambda self: self._parse_references(match=False), 758 "TITLE": lambda self: self.expression( 759 exp.TitleColumnConstraint, this=self._parse_var_or_string() 760 ), 761 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 762 "UNIQUE": lambda self: self._parse_unique(), 763 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 764 "WITH": lambda self: self.expression( 765 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 766 ), 767 } 768 769 ALTER_PARSERS = { 770 "ADD": lambda self: self._parse_alter_table_add(), 771 "ALTER": lambda self: self._parse_alter_table_alter(), 772 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 773 "DROP": lambda self: self._parse_alter_table_drop(), 774 "RENAME": lambda self: self._parse_alter_table_rename(), 775 } 776 777 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 778 779 NO_PAREN_FUNCTION_PARSERS = { 780 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 781 "CASE": lambda self: self._parse_case(), 782 "IF": lambda self: self._parse_if(), 783 "NEXT": lambda self: self._parse_next_value_for(), 784 } 785 786 INVALID_FUNC_NAME_TOKENS = { 787 TokenType.IDENTIFIER, 788 TokenType.STRING, 789 } 790 791 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 792 793 FUNCTION_PARSERS = { 794 "ANY_VALUE": lambda self: self._parse_any_value(), 795 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 796 "CONCAT": lambda self: self._parse_concat(), 797 "CONCAT_WS": lambda self: self._parse_concat_ws(), 798 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 799 "DECODE": lambda self: self._parse_decode(), 800 "EXTRACT": lambda self: self._parse_extract(), 801 "JSON_OBJECT": lambda self: self._parse_json_object(), 802 "JSON_TABLE": lambda self: self._parse_json_table(), 803 "LOG": lambda self: self._parse_logarithm(), 804 "MATCH": lambda self: self._parse_match_against(), 805 "OPENJSON": lambda self: self._parse_open_json(), 806 "POSITION": lambda self: self._parse_position(), 807 "PREDICT": lambda self: self._parse_predict(), 808 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 809 "STRING_AGG": lambda self: self._parse_string_agg(), 810 "SUBSTRING": lambda self: self._parse_substring(), 811 "TRIM": lambda self: self._parse_trim(), 812 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 813 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 814 } 815 816 QUERY_MODIFIER_PARSERS = { 817 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 818 TokenType.WHERE: lambda self: ("where", self._parse_where()), 819 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 820 TokenType.HAVING: lambda self: ("having", self._parse_having()), 821 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 822 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 823 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 824 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 825 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 826 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 827 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 828 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 829 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 830 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 831 TokenType.CLUSTER_BY: lambda self: ( 832 "cluster", 833 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 834 ), 835 TokenType.DISTRIBUTE_BY: lambda self: ( 836 "distribute", 837 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 838 ), 839 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 840 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 841 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 842 } 843 844 SET_PARSERS = { 845 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 846 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 847 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 848 "TRANSACTION": lambda self: self._parse_set_transaction(), 849 } 850 851 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 852 853 TYPE_LITERAL_PARSERS = { 854 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 855 } 856 857 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 858 859 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 860 861 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 862 863 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 864 TRANSACTION_CHARACTERISTICS = { 865 "ISOLATION LEVEL REPEATABLE READ", 866 "ISOLATION LEVEL READ COMMITTED", 867 "ISOLATION LEVEL READ UNCOMMITTED", 868 "ISOLATION LEVEL SERIALIZABLE", 869 "READ WRITE", 870 "READ ONLY", 871 } 872 873 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 874 875 CLONE_KEYWORDS = {"CLONE", "COPY"} 876 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 877 878 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 879 880 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 881 882 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 883 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 884 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 885 886 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 887 888 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 889 890 DISTINCT_TOKENS = {TokenType.DISTINCT} 891 892 NULL_TOKENS = {TokenType.NULL} 893 894 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 895 896 STRICT_CAST = True 897 898 # A NULL arg in CONCAT yields NULL by default 899 CONCAT_NULL_OUTPUTS_STRING = False 900 901 PREFIXED_PIVOT_COLUMNS = False 902 IDENTIFY_PIVOT_STRINGS = False 903 904 LOG_BASE_FIRST = True 905 LOG_DEFAULTS_TO_LN = False 906 907 # Whether or not ADD is present for each column added by ALTER TABLE 908 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 909 910 # Whether or not the table sample clause expects CSV syntax 911 TABLESAMPLE_CSV = False 912 913 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 914 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 915 916 # Whether the TRIM function expects the characters to trim as its first argument 917 TRIM_PATTERN_FIRST = False 918 919 __slots__ = ( 920 "error_level", 921 "error_message_context", 922 "max_errors", 923 "sql", 924 "errors", 925 "_tokens", 926 "_index", 927 "_curr", 928 "_next", 929 "_prev", 930 "_prev_comments", 931 "_tokenizer", 932 ) 933 934 # Autofilled 935 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 936 INDEX_OFFSET: int = 0 937 UNNEST_COLUMN_ONLY: bool = False 938 ALIAS_POST_TABLESAMPLE: bool = False 939 STRICT_STRING_CONCAT = False 940 SUPPORTS_USER_DEFINED_TYPES = True 941 NORMALIZE_FUNCTIONS = "upper" 942 NULL_ORDERING: str = "nulls_are_small" 943 SHOW_TRIE: t.Dict = {} 944 SET_TRIE: t.Dict = {} 945 FORMAT_MAPPING: t.Dict[str, str] = {} 946 FORMAT_TRIE: t.Dict = {} 947 TIME_MAPPING: t.Dict[str, str] = {} 948 TIME_TRIE: t.Dict = {} 949 950 def __init__( 951 self, 952 error_level: t.Optional[ErrorLevel] = None, 953 error_message_context: int = 100, 954 max_errors: int = 3, 955 ): 956 self.error_level = error_level or ErrorLevel.IMMEDIATE 957 self.error_message_context = error_message_context 958 self.max_errors = max_errors 959 self._tokenizer = self.TOKENIZER_CLASS() 960 self.reset() 961 962 def reset(self): 963 self.sql = "" 964 self.errors = [] 965 self._tokens = [] 966 self._index = 0 967 self._curr = None 968 self._next = None 969 self._prev = None 970 self._prev_comments = None 971 972 def parse( 973 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 974 ) -> t.List[t.Optional[exp.Expression]]: 975 """ 976 Parses a list of tokens and returns a list of syntax trees, one tree 977 per parsed SQL statement. 978 979 Args: 980 raw_tokens: The list of tokens. 981 sql: The original SQL string, used to produce helpful debug messages. 982 983 Returns: 984 The list of the produced syntax trees. 985 """ 986 return self._parse( 987 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 988 ) 989 990 def parse_into( 991 self, 992 expression_types: exp.IntoType, 993 raw_tokens: t.List[Token], 994 sql: t.Optional[str] = None, 995 ) -> t.List[t.Optional[exp.Expression]]: 996 """ 997 Parses a list of tokens into a given Expression type. If a collection of Expression 998 types is given instead, this method will try to parse the token list into each one 999 of them, stopping at the first for which the parsing succeeds. 1000 1001 Args: 1002 expression_types: The expression type(s) to try and parse the token list into. 1003 raw_tokens: The list of tokens. 1004 sql: The original SQL string, used to produce helpful debug messages. 1005 1006 Returns: 1007 The target Expression. 1008 """ 1009 errors = [] 1010 for expression_type in ensure_list(expression_types): 1011 parser = self.EXPRESSION_PARSERS.get(expression_type) 1012 if not parser: 1013 raise TypeError(f"No parser registered for {expression_type}") 1014 1015 try: 1016 return self._parse(parser, raw_tokens, sql) 1017 except ParseError as e: 1018 e.errors[0]["into_expression"] = expression_type 1019 errors.append(e) 1020 1021 raise ParseError( 1022 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1023 errors=merge_errors(errors), 1024 ) from errors[-1] 1025 1026 def _parse( 1027 self, 1028 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1029 raw_tokens: t.List[Token], 1030 sql: t.Optional[str] = None, 1031 ) -> t.List[t.Optional[exp.Expression]]: 1032 self.reset() 1033 self.sql = sql or "" 1034 1035 total = len(raw_tokens) 1036 chunks: t.List[t.List[Token]] = [[]] 1037 1038 for i, token in enumerate(raw_tokens): 1039 if token.token_type == TokenType.SEMICOLON: 1040 if i < total - 1: 1041 chunks.append([]) 1042 else: 1043 chunks[-1].append(token) 1044 1045 expressions = [] 1046 1047 for tokens in chunks: 1048 self._index = -1 1049 self._tokens = tokens 1050 self._advance() 1051 1052 expressions.append(parse_method(self)) 1053 1054 if self._index < len(self._tokens): 1055 self.raise_error("Invalid expression / Unexpected token") 1056 1057 self.check_errors() 1058 1059 return expressions 1060 1061 def check_errors(self) -> None: 1062 """Logs or raises any found errors, depending on the chosen error level setting.""" 1063 if self.error_level == ErrorLevel.WARN: 1064 for error in self.errors: 1065 logger.error(str(error)) 1066 elif self.error_level == ErrorLevel.RAISE and self.errors: 1067 raise ParseError( 1068 concat_messages(self.errors, self.max_errors), 1069 errors=merge_errors(self.errors), 1070 ) 1071 1072 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1073 """ 1074 Appends an error in the list of recorded errors or raises it, depending on the chosen 1075 error level setting. 1076 """ 1077 token = token or self._curr or self._prev or Token.string("") 1078 start = token.start 1079 end = token.end + 1 1080 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1081 highlight = self.sql[start:end] 1082 end_context = self.sql[end : end + self.error_message_context] 1083 1084 error = ParseError.new( 1085 f"{message}. Line {token.line}, Col: {token.col}.\n" 1086 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1087 description=message, 1088 line=token.line, 1089 col=token.col, 1090 start_context=start_context, 1091 highlight=highlight, 1092 end_context=end_context, 1093 ) 1094 1095 if self.error_level == ErrorLevel.IMMEDIATE: 1096 raise error 1097 1098 self.errors.append(error) 1099 1100 def expression( 1101 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1102 ) -> E: 1103 """ 1104 Creates a new, validated Expression. 1105 1106 Args: 1107 exp_class: The expression class to instantiate. 1108 comments: An optional list of comments to attach to the expression. 1109 kwargs: The arguments to set for the expression along with their respective values. 1110 1111 Returns: 1112 The target expression. 1113 """ 1114 instance = exp_class(**kwargs) 1115 instance.add_comments(comments) if comments else self._add_comments(instance) 1116 return self.validate_expression(instance) 1117 1118 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1119 if expression and self._prev_comments: 1120 expression.add_comments(self._prev_comments) 1121 self._prev_comments = None 1122 1123 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1124 """ 1125 Validates an Expression, making sure that all its mandatory arguments are set. 1126 1127 Args: 1128 expression: The expression to validate. 1129 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1130 1131 Returns: 1132 The validated expression. 1133 """ 1134 if self.error_level != ErrorLevel.IGNORE: 1135 for error_message in expression.error_messages(args): 1136 self.raise_error(error_message) 1137 1138 return expression 1139 1140 def _find_sql(self, start: Token, end: Token) -> str: 1141 return self.sql[start.start : end.end + 1] 1142 1143 def _advance(self, times: int = 1) -> None: 1144 self._index += times 1145 self._curr = seq_get(self._tokens, self._index) 1146 self._next = seq_get(self._tokens, self._index + 1) 1147 1148 if self._index > 0: 1149 self._prev = self._tokens[self._index - 1] 1150 self._prev_comments = self._prev.comments 1151 else: 1152 self._prev = None 1153 self._prev_comments = None 1154 1155 def _retreat(self, index: int) -> None: 1156 if index != self._index: 1157 self._advance(index - self._index) 1158 1159 def _parse_command(self) -> exp.Command: 1160 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1161 1162 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1163 start = self._prev 1164 exists = self._parse_exists() if allow_exists else None 1165 1166 self._match(TokenType.ON) 1167 1168 kind = self._match_set(self.CREATABLES) and self._prev 1169 if not kind: 1170 return self._parse_as_command(start) 1171 1172 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1173 this = self._parse_user_defined_function(kind=kind.token_type) 1174 elif kind.token_type == TokenType.TABLE: 1175 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1176 elif kind.token_type == TokenType.COLUMN: 1177 this = self._parse_column() 1178 else: 1179 this = self._parse_id_var() 1180 1181 self._match(TokenType.IS) 1182 1183 return self.expression( 1184 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1185 ) 1186 1187 def _parse_to_table( 1188 self, 1189 ) -> exp.ToTableProperty: 1190 table = self._parse_table_parts(schema=True) 1191 return self.expression(exp.ToTableProperty, this=table) 1192 1193 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1194 def _parse_ttl(self) -> exp.Expression: 1195 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1196 this = self._parse_bitwise() 1197 1198 if self._match_text_seq("DELETE"): 1199 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1200 if self._match_text_seq("RECOMPRESS"): 1201 return self.expression( 1202 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1203 ) 1204 if self._match_text_seq("TO", "DISK"): 1205 return self.expression( 1206 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1207 ) 1208 if self._match_text_seq("TO", "VOLUME"): 1209 return self.expression( 1210 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1211 ) 1212 1213 return this 1214 1215 expressions = self._parse_csv(_parse_ttl_action) 1216 where = self._parse_where() 1217 group = self._parse_group() 1218 1219 aggregates = None 1220 if group and self._match(TokenType.SET): 1221 aggregates = self._parse_csv(self._parse_set_item) 1222 1223 return self.expression( 1224 exp.MergeTreeTTL, 1225 expressions=expressions, 1226 where=where, 1227 group=group, 1228 aggregates=aggregates, 1229 ) 1230 1231 def _parse_statement(self) -> t.Optional[exp.Expression]: 1232 if self._curr is None: 1233 return None 1234 1235 if self._match_set(self.STATEMENT_PARSERS): 1236 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1237 1238 if self._match_set(Tokenizer.COMMANDS): 1239 return self._parse_command() 1240 1241 expression = self._parse_expression() 1242 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1243 return self._parse_query_modifiers(expression) 1244 1245 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1246 start = self._prev 1247 temporary = self._match(TokenType.TEMPORARY) 1248 materialized = self._match_text_seq("MATERIALIZED") 1249 1250 kind = self._match_set(self.CREATABLES) and self._prev.text 1251 if not kind: 1252 return self._parse_as_command(start) 1253 1254 return self.expression( 1255 exp.Drop, 1256 comments=start.comments, 1257 exists=exists or self._parse_exists(), 1258 this=self._parse_table(schema=True), 1259 kind=kind, 1260 temporary=temporary, 1261 materialized=materialized, 1262 cascade=self._match_text_seq("CASCADE"), 1263 constraints=self._match_text_seq("CONSTRAINTS"), 1264 purge=self._match_text_seq("PURGE"), 1265 ) 1266 1267 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1268 return ( 1269 self._match_text_seq("IF") 1270 and (not not_ or self._match(TokenType.NOT)) 1271 and self._match(TokenType.EXISTS) 1272 ) 1273 1274 def _parse_create(self) -> exp.Create | exp.Command: 1275 # Note: this can't be None because we've matched a statement parser 1276 start = self._prev 1277 comments = self._prev_comments 1278 1279 replace = start.text.upper() == "REPLACE" or self._match_pair( 1280 TokenType.OR, TokenType.REPLACE 1281 ) 1282 unique = self._match(TokenType.UNIQUE) 1283 1284 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1285 self._advance() 1286 1287 properties = None 1288 create_token = self._match_set(self.CREATABLES) and self._prev 1289 1290 if not create_token: 1291 # exp.Properties.Location.POST_CREATE 1292 properties = self._parse_properties() 1293 create_token = self._match_set(self.CREATABLES) and self._prev 1294 1295 if not properties or not create_token: 1296 return self._parse_as_command(start) 1297 1298 exists = self._parse_exists(not_=True) 1299 this = None 1300 expression: t.Optional[exp.Expression] = None 1301 indexes = None 1302 no_schema_binding = None 1303 begin = None 1304 end = None 1305 clone = None 1306 1307 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1308 nonlocal properties 1309 if properties and temp_props: 1310 properties.expressions.extend(temp_props.expressions) 1311 elif temp_props: 1312 properties = temp_props 1313 1314 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1315 this = self._parse_user_defined_function(kind=create_token.token_type) 1316 1317 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1318 extend_props(self._parse_properties()) 1319 1320 self._match(TokenType.ALIAS) 1321 1322 if self._match(TokenType.COMMAND): 1323 expression = self._parse_as_command(self._prev) 1324 else: 1325 begin = self._match(TokenType.BEGIN) 1326 return_ = self._match_text_seq("RETURN") 1327 1328 if self._match(TokenType.STRING, advance=False): 1329 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1330 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1331 expression = self._parse_string() 1332 extend_props(self._parse_properties()) 1333 else: 1334 expression = self._parse_statement() 1335 1336 end = self._match_text_seq("END") 1337 1338 if return_: 1339 expression = self.expression(exp.Return, this=expression) 1340 elif create_token.token_type == TokenType.INDEX: 1341 this = self._parse_index(index=self._parse_id_var()) 1342 elif create_token.token_type in self.DB_CREATABLES: 1343 table_parts = self._parse_table_parts(schema=True) 1344 1345 # exp.Properties.Location.POST_NAME 1346 self._match(TokenType.COMMA) 1347 extend_props(self._parse_properties(before=True)) 1348 1349 this = self._parse_schema(this=table_parts) 1350 1351 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1352 extend_props(self._parse_properties()) 1353 1354 self._match(TokenType.ALIAS) 1355 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1356 # exp.Properties.Location.POST_ALIAS 1357 extend_props(self._parse_properties()) 1358 1359 expression = self._parse_ddl_select() 1360 1361 if create_token.token_type == TokenType.TABLE: 1362 # exp.Properties.Location.POST_EXPRESSION 1363 extend_props(self._parse_properties()) 1364 1365 indexes = [] 1366 while True: 1367 index = self._parse_index() 1368 1369 # exp.Properties.Location.POST_INDEX 1370 extend_props(self._parse_properties()) 1371 1372 if not index: 1373 break 1374 else: 1375 self._match(TokenType.COMMA) 1376 indexes.append(index) 1377 elif create_token.token_type == TokenType.VIEW: 1378 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1379 no_schema_binding = True 1380 1381 shallow = self._match_text_seq("SHALLOW") 1382 1383 if self._match_texts(self.CLONE_KEYWORDS): 1384 copy = self._prev.text.lower() == "copy" 1385 clone = self._parse_table(schema=True) 1386 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1387 clone_kind = ( 1388 self._match(TokenType.L_PAREN) 1389 and self._match_texts(self.CLONE_KINDS) 1390 and self._prev.text.upper() 1391 ) 1392 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1393 self._match(TokenType.R_PAREN) 1394 clone = self.expression( 1395 exp.Clone, 1396 this=clone, 1397 when=when, 1398 kind=clone_kind, 1399 shallow=shallow, 1400 expression=clone_expression, 1401 copy=copy, 1402 ) 1403 1404 return self.expression( 1405 exp.Create, 1406 comments=comments, 1407 this=this, 1408 kind=create_token.text, 1409 replace=replace, 1410 unique=unique, 1411 expression=expression, 1412 exists=exists, 1413 properties=properties, 1414 indexes=indexes, 1415 no_schema_binding=no_schema_binding, 1416 begin=begin, 1417 end=end, 1418 clone=clone, 1419 ) 1420 1421 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1422 # only used for teradata currently 1423 self._match(TokenType.COMMA) 1424 1425 kwargs = { 1426 "no": self._match_text_seq("NO"), 1427 "dual": self._match_text_seq("DUAL"), 1428 "before": self._match_text_seq("BEFORE"), 1429 "default": self._match_text_seq("DEFAULT"), 1430 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1431 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1432 "after": self._match_text_seq("AFTER"), 1433 "minimum": self._match_texts(("MIN", "MINIMUM")), 1434 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1435 } 1436 1437 if self._match_texts(self.PROPERTY_PARSERS): 1438 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1439 try: 1440 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1441 except TypeError: 1442 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1443 1444 return None 1445 1446 def _parse_property(self) -> t.Optional[exp.Expression]: 1447 if self._match_texts(self.PROPERTY_PARSERS): 1448 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1449 1450 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1451 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1452 1453 if self._match_text_seq("COMPOUND", "SORTKEY"): 1454 return self._parse_sortkey(compound=True) 1455 1456 if self._match_text_seq("SQL", "SECURITY"): 1457 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1458 1459 index = self._index 1460 key = self._parse_column() 1461 1462 if not self._match(TokenType.EQ): 1463 self._retreat(index) 1464 return None 1465 1466 return self.expression( 1467 exp.Property, 1468 this=key.to_dot() if isinstance(key, exp.Column) else key, 1469 value=self._parse_column() or self._parse_var(any_token=True), 1470 ) 1471 1472 def _parse_stored(self) -> exp.FileFormatProperty: 1473 self._match(TokenType.ALIAS) 1474 1475 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1476 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1477 1478 return self.expression( 1479 exp.FileFormatProperty, 1480 this=self.expression( 1481 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1482 ) 1483 if input_format or output_format 1484 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1485 ) 1486 1487 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1488 self._match(TokenType.EQ) 1489 self._match(TokenType.ALIAS) 1490 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1491 1492 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1493 properties = [] 1494 while True: 1495 if before: 1496 prop = self._parse_property_before() 1497 else: 1498 prop = self._parse_property() 1499 1500 if not prop: 1501 break 1502 for p in ensure_list(prop): 1503 properties.append(p) 1504 1505 if properties: 1506 return self.expression(exp.Properties, expressions=properties) 1507 1508 return None 1509 1510 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1511 return self.expression( 1512 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1513 ) 1514 1515 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1516 if self._index >= 2: 1517 pre_volatile_token = self._tokens[self._index - 2] 1518 else: 1519 pre_volatile_token = None 1520 1521 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1522 return exp.VolatileProperty() 1523 1524 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1525 1526 def _parse_with_property( 1527 self, 1528 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1529 if self._match(TokenType.L_PAREN, advance=False): 1530 return self._parse_wrapped_csv(self._parse_property) 1531 1532 if self._match_text_seq("JOURNAL"): 1533 return self._parse_withjournaltable() 1534 1535 if self._match_text_seq("DATA"): 1536 return self._parse_withdata(no=False) 1537 elif self._match_text_seq("NO", "DATA"): 1538 return self._parse_withdata(no=True) 1539 1540 if not self._next: 1541 return None 1542 1543 return self._parse_withisolatedloading() 1544 1545 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1546 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1547 self._match(TokenType.EQ) 1548 1549 user = self._parse_id_var() 1550 self._match(TokenType.PARAMETER) 1551 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1552 1553 if not user or not host: 1554 return None 1555 1556 return exp.DefinerProperty(this=f"{user}@{host}") 1557 1558 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1559 self._match(TokenType.TABLE) 1560 self._match(TokenType.EQ) 1561 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1562 1563 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1564 return self.expression(exp.LogProperty, no=no) 1565 1566 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1567 return self.expression(exp.JournalProperty, **kwargs) 1568 1569 def _parse_checksum(self) -> exp.ChecksumProperty: 1570 self._match(TokenType.EQ) 1571 1572 on = None 1573 if self._match(TokenType.ON): 1574 on = True 1575 elif self._match_text_seq("OFF"): 1576 on = False 1577 1578 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1579 1580 def _parse_cluster(self) -> exp.Cluster: 1581 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1582 1583 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1584 self._match_text_seq("BY") 1585 1586 self._match_l_paren() 1587 expressions = self._parse_csv(self._parse_column) 1588 self._match_r_paren() 1589 1590 if self._match_text_seq("SORTED", "BY"): 1591 self._match_l_paren() 1592 sorted_by = self._parse_csv(self._parse_ordered) 1593 self._match_r_paren() 1594 else: 1595 sorted_by = None 1596 1597 self._match(TokenType.INTO) 1598 buckets = self._parse_number() 1599 self._match_text_seq("BUCKETS") 1600 1601 return self.expression( 1602 exp.ClusteredByProperty, 1603 expressions=expressions, 1604 sorted_by=sorted_by, 1605 buckets=buckets, 1606 ) 1607 1608 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1609 if not self._match_text_seq("GRANTS"): 1610 self._retreat(self._index - 1) 1611 return None 1612 1613 return self.expression(exp.CopyGrantsProperty) 1614 1615 def _parse_freespace(self) -> exp.FreespaceProperty: 1616 self._match(TokenType.EQ) 1617 return self.expression( 1618 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1619 ) 1620 1621 def _parse_mergeblockratio( 1622 self, no: bool = False, default: bool = False 1623 ) -> exp.MergeBlockRatioProperty: 1624 if self._match(TokenType.EQ): 1625 return self.expression( 1626 exp.MergeBlockRatioProperty, 1627 this=self._parse_number(), 1628 percent=self._match(TokenType.PERCENT), 1629 ) 1630 1631 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1632 1633 def _parse_datablocksize( 1634 self, 1635 default: t.Optional[bool] = None, 1636 minimum: t.Optional[bool] = None, 1637 maximum: t.Optional[bool] = None, 1638 ) -> exp.DataBlocksizeProperty: 1639 self._match(TokenType.EQ) 1640 size = self._parse_number() 1641 1642 units = None 1643 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1644 units = self._prev.text 1645 1646 return self.expression( 1647 exp.DataBlocksizeProperty, 1648 size=size, 1649 units=units, 1650 default=default, 1651 minimum=minimum, 1652 maximum=maximum, 1653 ) 1654 1655 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1656 self._match(TokenType.EQ) 1657 always = self._match_text_seq("ALWAYS") 1658 manual = self._match_text_seq("MANUAL") 1659 never = self._match_text_seq("NEVER") 1660 default = self._match_text_seq("DEFAULT") 1661 1662 autotemp = None 1663 if self._match_text_seq("AUTOTEMP"): 1664 autotemp = self._parse_schema() 1665 1666 return self.expression( 1667 exp.BlockCompressionProperty, 1668 always=always, 1669 manual=manual, 1670 never=never, 1671 default=default, 1672 autotemp=autotemp, 1673 ) 1674 1675 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1676 no = self._match_text_seq("NO") 1677 concurrent = self._match_text_seq("CONCURRENT") 1678 self._match_text_seq("ISOLATED", "LOADING") 1679 for_all = self._match_text_seq("FOR", "ALL") 1680 for_insert = self._match_text_seq("FOR", "INSERT") 1681 for_none = self._match_text_seq("FOR", "NONE") 1682 return self.expression( 1683 exp.IsolatedLoadingProperty, 1684 no=no, 1685 concurrent=concurrent, 1686 for_all=for_all, 1687 for_insert=for_insert, 1688 for_none=for_none, 1689 ) 1690 1691 def _parse_locking(self) -> exp.LockingProperty: 1692 if self._match(TokenType.TABLE): 1693 kind = "TABLE" 1694 elif self._match(TokenType.VIEW): 1695 kind = "VIEW" 1696 elif self._match(TokenType.ROW): 1697 kind = "ROW" 1698 elif self._match_text_seq("DATABASE"): 1699 kind = "DATABASE" 1700 else: 1701 kind = None 1702 1703 if kind in ("DATABASE", "TABLE", "VIEW"): 1704 this = self._parse_table_parts() 1705 else: 1706 this = None 1707 1708 if self._match(TokenType.FOR): 1709 for_or_in = "FOR" 1710 elif self._match(TokenType.IN): 1711 for_or_in = "IN" 1712 else: 1713 for_or_in = None 1714 1715 if self._match_text_seq("ACCESS"): 1716 lock_type = "ACCESS" 1717 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1718 lock_type = "EXCLUSIVE" 1719 elif self._match_text_seq("SHARE"): 1720 lock_type = "SHARE" 1721 elif self._match_text_seq("READ"): 1722 lock_type = "READ" 1723 elif self._match_text_seq("WRITE"): 1724 lock_type = "WRITE" 1725 elif self._match_text_seq("CHECKSUM"): 1726 lock_type = "CHECKSUM" 1727 else: 1728 lock_type = None 1729 1730 override = self._match_text_seq("OVERRIDE") 1731 1732 return self.expression( 1733 exp.LockingProperty, 1734 this=this, 1735 kind=kind, 1736 for_or_in=for_or_in, 1737 lock_type=lock_type, 1738 override=override, 1739 ) 1740 1741 def _parse_partition_by(self) -> t.List[exp.Expression]: 1742 if self._match(TokenType.PARTITION_BY): 1743 return self._parse_csv(self._parse_conjunction) 1744 return [] 1745 1746 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1747 self._match(TokenType.EQ) 1748 return self.expression( 1749 exp.PartitionedByProperty, 1750 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1751 ) 1752 1753 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1754 if self._match_text_seq("AND", "STATISTICS"): 1755 statistics = True 1756 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1757 statistics = False 1758 else: 1759 statistics = None 1760 1761 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1762 1763 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1764 if self._match_text_seq("PRIMARY", "INDEX"): 1765 return exp.NoPrimaryIndexProperty() 1766 return None 1767 1768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1769 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1770 return exp.OnCommitProperty() 1771 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1772 return exp.OnCommitProperty(delete=True) 1773 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1774 1775 def _parse_distkey(self) -> exp.DistKeyProperty: 1776 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1777 1778 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1779 table = self._parse_table(schema=True) 1780 1781 options = [] 1782 while self._match_texts(("INCLUDING", "EXCLUDING")): 1783 this = self._prev.text.upper() 1784 1785 id_var = self._parse_id_var() 1786 if not id_var: 1787 return None 1788 1789 options.append( 1790 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1791 ) 1792 1793 return self.expression(exp.LikeProperty, this=table, expressions=options) 1794 1795 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1796 return self.expression( 1797 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1798 ) 1799 1800 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1801 self._match(TokenType.EQ) 1802 return self.expression( 1803 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1804 ) 1805 1806 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1807 self._match_text_seq("WITH", "CONNECTION") 1808 return self.expression( 1809 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1810 ) 1811 1812 def _parse_returns(self) -> exp.ReturnsProperty: 1813 value: t.Optional[exp.Expression] 1814 is_table = self._match(TokenType.TABLE) 1815 1816 if is_table: 1817 if self._match(TokenType.LT): 1818 value = self.expression( 1819 exp.Schema, 1820 this="TABLE", 1821 expressions=self._parse_csv(self._parse_struct_types), 1822 ) 1823 if not self._match(TokenType.GT): 1824 self.raise_error("Expecting >") 1825 else: 1826 value = self._parse_schema(exp.var("TABLE")) 1827 else: 1828 value = self._parse_types() 1829 1830 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1831 1832 def _parse_describe(self) -> exp.Describe: 1833 kind = self._match_set(self.CREATABLES) and self._prev.text 1834 this = self._parse_table(schema=True) 1835 properties = self._parse_properties() 1836 expressions = properties.expressions if properties else None 1837 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1838 1839 def _parse_insert(self) -> exp.Insert: 1840 comments = ensure_list(self._prev_comments) 1841 overwrite = self._match(TokenType.OVERWRITE) 1842 ignore = self._match(TokenType.IGNORE) 1843 local = self._match_text_seq("LOCAL") 1844 alternative = None 1845 1846 if self._match_text_seq("DIRECTORY"): 1847 this: t.Optional[exp.Expression] = self.expression( 1848 exp.Directory, 1849 this=self._parse_var_or_string(), 1850 local=local, 1851 row_format=self._parse_row_format(match_row=True), 1852 ) 1853 else: 1854 if self._match(TokenType.OR): 1855 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1856 1857 self._match(TokenType.INTO) 1858 comments += ensure_list(self._prev_comments) 1859 self._match(TokenType.TABLE) 1860 this = self._parse_table(schema=True) 1861 1862 returning = self._parse_returning() 1863 1864 return self.expression( 1865 exp.Insert, 1866 comments=comments, 1867 this=this, 1868 by_name=self._match_text_seq("BY", "NAME"), 1869 exists=self._parse_exists(), 1870 partition=self._parse_partition(), 1871 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1872 and self._parse_conjunction(), 1873 expression=self._parse_ddl_select(), 1874 conflict=self._parse_on_conflict(), 1875 returning=returning or self._parse_returning(), 1876 overwrite=overwrite, 1877 alternative=alternative, 1878 ignore=ignore, 1879 ) 1880 1881 def _parse_kill(self) -> exp.Kill: 1882 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1883 1884 return self.expression( 1885 exp.Kill, 1886 this=self._parse_primary(), 1887 kind=kind, 1888 ) 1889 1890 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1891 conflict = self._match_text_seq("ON", "CONFLICT") 1892 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1893 1894 if not conflict and not duplicate: 1895 return None 1896 1897 nothing = None 1898 expressions = None 1899 key = None 1900 constraint = None 1901 1902 if conflict: 1903 if self._match_text_seq("ON", "CONSTRAINT"): 1904 constraint = self._parse_id_var() 1905 else: 1906 key = self._parse_csv(self._parse_value) 1907 1908 self._match_text_seq("DO") 1909 if self._match_text_seq("NOTHING"): 1910 nothing = True 1911 else: 1912 self._match(TokenType.UPDATE) 1913 self._match(TokenType.SET) 1914 expressions = self._parse_csv(self._parse_equality) 1915 1916 return self.expression( 1917 exp.OnConflict, 1918 duplicate=duplicate, 1919 expressions=expressions, 1920 nothing=nothing, 1921 key=key, 1922 constraint=constraint, 1923 ) 1924 1925 def _parse_returning(self) -> t.Optional[exp.Returning]: 1926 if not self._match(TokenType.RETURNING): 1927 return None 1928 return self.expression( 1929 exp.Returning, 1930 expressions=self._parse_csv(self._parse_expression), 1931 into=self._match(TokenType.INTO) and self._parse_table_part(), 1932 ) 1933 1934 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1935 if not self._match(TokenType.FORMAT): 1936 return None 1937 return self._parse_row_format() 1938 1939 def _parse_row_format( 1940 self, match_row: bool = False 1941 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1942 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1943 return None 1944 1945 if self._match_text_seq("SERDE"): 1946 this = self._parse_string() 1947 1948 serde_properties = None 1949 if self._match(TokenType.SERDE_PROPERTIES): 1950 serde_properties = self.expression( 1951 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1952 ) 1953 1954 return self.expression( 1955 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1956 ) 1957 1958 self._match_text_seq("DELIMITED") 1959 1960 kwargs = {} 1961 1962 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1963 kwargs["fields"] = self._parse_string() 1964 if self._match_text_seq("ESCAPED", "BY"): 1965 kwargs["escaped"] = self._parse_string() 1966 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1967 kwargs["collection_items"] = self._parse_string() 1968 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1969 kwargs["map_keys"] = self._parse_string() 1970 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1971 kwargs["lines"] = self._parse_string() 1972 if self._match_text_seq("NULL", "DEFINED", "AS"): 1973 kwargs["null"] = self._parse_string() 1974 1975 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1976 1977 def _parse_load(self) -> exp.LoadData | exp.Command: 1978 if self._match_text_seq("DATA"): 1979 local = self._match_text_seq("LOCAL") 1980 self._match_text_seq("INPATH") 1981 inpath = self._parse_string() 1982 overwrite = self._match(TokenType.OVERWRITE) 1983 self._match_pair(TokenType.INTO, TokenType.TABLE) 1984 1985 return self.expression( 1986 exp.LoadData, 1987 this=self._parse_table(schema=True), 1988 local=local, 1989 overwrite=overwrite, 1990 inpath=inpath, 1991 partition=self._parse_partition(), 1992 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1993 serde=self._match_text_seq("SERDE") and self._parse_string(), 1994 ) 1995 return self._parse_as_command(self._prev) 1996 1997 def _parse_delete(self) -> exp.Delete: 1998 # This handles MySQL's "Multiple-Table Syntax" 1999 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2000 tables = None 2001 comments = self._prev_comments 2002 if not self._match(TokenType.FROM, advance=False): 2003 tables = self._parse_csv(self._parse_table) or None 2004 2005 returning = self._parse_returning() 2006 2007 return self.expression( 2008 exp.Delete, 2009 comments=comments, 2010 tables=tables, 2011 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2012 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2013 where=self._parse_where(), 2014 returning=returning or self._parse_returning(), 2015 limit=self._parse_limit(), 2016 ) 2017 2018 def _parse_update(self) -> exp.Update: 2019 comments = self._prev_comments 2020 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2021 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2022 returning = self._parse_returning() 2023 return self.expression( 2024 exp.Update, 2025 comments=comments, 2026 **{ # type: ignore 2027 "this": this, 2028 "expressions": expressions, 2029 "from": self._parse_from(joins=True), 2030 "where": self._parse_where(), 2031 "returning": returning or self._parse_returning(), 2032 "order": self._parse_order(), 2033 "limit": self._parse_limit(), 2034 }, 2035 ) 2036 2037 def _parse_uncache(self) -> exp.Uncache: 2038 if not self._match(TokenType.TABLE): 2039 self.raise_error("Expecting TABLE after UNCACHE") 2040 2041 return self.expression( 2042 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2043 ) 2044 2045 def _parse_cache(self) -> exp.Cache: 2046 lazy = self._match_text_seq("LAZY") 2047 self._match(TokenType.TABLE) 2048 table = self._parse_table(schema=True) 2049 2050 options = [] 2051 if self._match_text_seq("OPTIONS"): 2052 self._match_l_paren() 2053 k = self._parse_string() 2054 self._match(TokenType.EQ) 2055 v = self._parse_string() 2056 options = [k, v] 2057 self._match_r_paren() 2058 2059 self._match(TokenType.ALIAS) 2060 return self.expression( 2061 exp.Cache, 2062 this=table, 2063 lazy=lazy, 2064 options=options, 2065 expression=self._parse_select(nested=True), 2066 ) 2067 2068 def _parse_partition(self) -> t.Optional[exp.Partition]: 2069 if not self._match(TokenType.PARTITION): 2070 return None 2071 2072 return self.expression( 2073 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2074 ) 2075 2076 def _parse_value(self) -> exp.Tuple: 2077 if self._match(TokenType.L_PAREN): 2078 expressions = self._parse_csv(self._parse_conjunction) 2079 self._match_r_paren() 2080 return self.expression(exp.Tuple, expressions=expressions) 2081 2082 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2083 # https://prestodb.io/docs/current/sql/values.html 2084 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2085 2086 def _parse_projections(self) -> t.List[exp.Expression]: 2087 return self._parse_expressions() 2088 2089 def _parse_select( 2090 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2091 ) -> t.Optional[exp.Expression]: 2092 cte = self._parse_with() 2093 2094 if cte: 2095 this = self._parse_statement() 2096 2097 if not this: 2098 self.raise_error("Failed to parse any statement following CTE") 2099 return cte 2100 2101 if "with" in this.arg_types: 2102 this.set("with", cte) 2103 else: 2104 self.raise_error(f"{this.key} does not support CTE") 2105 this = cte 2106 2107 return this 2108 2109 # duckdb supports leading with FROM x 2110 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2111 2112 if self._match(TokenType.SELECT): 2113 comments = self._prev_comments 2114 2115 hint = self._parse_hint() 2116 all_ = self._match(TokenType.ALL) 2117 distinct = self._match_set(self.DISTINCT_TOKENS) 2118 2119 kind = ( 2120 self._match(TokenType.ALIAS) 2121 and self._match_texts(("STRUCT", "VALUE")) 2122 and self._prev.text 2123 ) 2124 2125 if distinct: 2126 distinct = self.expression( 2127 exp.Distinct, 2128 on=self._parse_value() if self._match(TokenType.ON) else None, 2129 ) 2130 2131 if all_ and distinct: 2132 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2133 2134 limit = self._parse_limit(top=True) 2135 projections = self._parse_projections() 2136 2137 this = self.expression( 2138 exp.Select, 2139 kind=kind, 2140 hint=hint, 2141 distinct=distinct, 2142 expressions=projections, 2143 limit=limit, 2144 ) 2145 this.comments = comments 2146 2147 into = self._parse_into() 2148 if into: 2149 this.set("into", into) 2150 2151 if not from_: 2152 from_ = self._parse_from() 2153 2154 if from_: 2155 this.set("from", from_) 2156 2157 this = self._parse_query_modifiers(this) 2158 elif (table or nested) and self._match(TokenType.L_PAREN): 2159 if self._match(TokenType.PIVOT): 2160 this = self._parse_simplified_pivot() 2161 elif self._match(TokenType.FROM): 2162 this = exp.select("*").from_( 2163 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2164 ) 2165 else: 2166 this = self._parse_table() if table else self._parse_select(nested=True) 2167 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2168 2169 self._match_r_paren() 2170 2171 # We return early here so that the UNION isn't attached to the subquery by the 2172 # following call to _parse_set_operations, but instead becomes the parent node 2173 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2174 elif self._match(TokenType.VALUES): 2175 this = self.expression( 2176 exp.Values, 2177 expressions=self._parse_csv(self._parse_value), 2178 alias=self._parse_table_alias(), 2179 ) 2180 elif from_: 2181 this = exp.select("*").from_(from_.this, copy=False) 2182 else: 2183 this = None 2184 2185 return self._parse_set_operations(this) 2186 2187 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2188 if not skip_with_token and not self._match(TokenType.WITH): 2189 return None 2190 2191 comments = self._prev_comments 2192 recursive = self._match(TokenType.RECURSIVE) 2193 2194 expressions = [] 2195 while True: 2196 expressions.append(self._parse_cte()) 2197 2198 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2199 break 2200 else: 2201 self._match(TokenType.WITH) 2202 2203 return self.expression( 2204 exp.With, comments=comments, expressions=expressions, recursive=recursive 2205 ) 2206 2207 def _parse_cte(self) -> exp.CTE: 2208 alias = self._parse_table_alias() 2209 if not alias or not alias.this: 2210 self.raise_error("Expected CTE to have alias") 2211 2212 self._match(TokenType.ALIAS) 2213 return self.expression( 2214 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2215 ) 2216 2217 def _parse_table_alias( 2218 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2219 ) -> t.Optional[exp.TableAlias]: 2220 any_token = self._match(TokenType.ALIAS) 2221 alias = ( 2222 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2223 or self._parse_string_as_identifier() 2224 ) 2225 2226 index = self._index 2227 if self._match(TokenType.L_PAREN): 2228 columns = self._parse_csv(self._parse_function_parameter) 2229 self._match_r_paren() if columns else self._retreat(index) 2230 else: 2231 columns = None 2232 2233 if not alias and not columns: 2234 return None 2235 2236 return self.expression(exp.TableAlias, this=alias, columns=columns) 2237 2238 def _parse_subquery( 2239 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2240 ) -> t.Optional[exp.Subquery]: 2241 if not this: 2242 return None 2243 2244 return self.expression( 2245 exp.Subquery, 2246 this=this, 2247 pivots=self._parse_pivots(), 2248 alias=self._parse_table_alias() if parse_alias else None, 2249 ) 2250 2251 def _parse_query_modifiers( 2252 self, this: t.Optional[exp.Expression] 2253 ) -> t.Optional[exp.Expression]: 2254 if isinstance(this, self.MODIFIABLES): 2255 for join in iter(self._parse_join, None): 2256 this.append("joins", join) 2257 for lateral in iter(self._parse_lateral, None): 2258 this.append("laterals", lateral) 2259 2260 while True: 2261 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2262 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2263 key, expression = parser(self) 2264 2265 if expression: 2266 this.set(key, expression) 2267 if key == "limit": 2268 offset = expression.args.pop("offset", None) 2269 if offset: 2270 this.set("offset", exp.Offset(expression=offset)) 2271 continue 2272 break 2273 return this 2274 2275 def _parse_hint(self) -> t.Optional[exp.Hint]: 2276 if self._match(TokenType.HINT): 2277 hints = [] 2278 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2279 hints.extend(hint) 2280 2281 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2282 self.raise_error("Expected */ after HINT") 2283 2284 return self.expression(exp.Hint, expressions=hints) 2285 2286 return None 2287 2288 def _parse_into(self) -> t.Optional[exp.Into]: 2289 if not self._match(TokenType.INTO): 2290 return None 2291 2292 temp = self._match(TokenType.TEMPORARY) 2293 unlogged = self._match_text_seq("UNLOGGED") 2294 self._match(TokenType.TABLE) 2295 2296 return self.expression( 2297 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2298 ) 2299 2300 def _parse_from( 2301 self, joins: bool = False, skip_from_token: bool = False 2302 ) -> t.Optional[exp.From]: 2303 if not skip_from_token and not self._match(TokenType.FROM): 2304 return None 2305 2306 return self.expression( 2307 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2308 ) 2309 2310 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2311 if not self._match(TokenType.MATCH_RECOGNIZE): 2312 return None 2313 2314 self._match_l_paren() 2315 2316 partition = self._parse_partition_by() 2317 order = self._parse_order() 2318 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2319 2320 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2321 rows = exp.var("ONE ROW PER MATCH") 2322 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2323 text = "ALL ROWS PER MATCH" 2324 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2325 text += f" SHOW EMPTY MATCHES" 2326 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2327 text += f" OMIT EMPTY MATCHES" 2328 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2329 text += f" WITH UNMATCHED ROWS" 2330 rows = exp.var(text) 2331 else: 2332 rows = None 2333 2334 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2335 text = "AFTER MATCH SKIP" 2336 if self._match_text_seq("PAST", "LAST", "ROW"): 2337 text += f" PAST LAST ROW" 2338 elif self._match_text_seq("TO", "NEXT", "ROW"): 2339 text += f" TO NEXT ROW" 2340 elif self._match_text_seq("TO", "FIRST"): 2341 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2342 elif self._match_text_seq("TO", "LAST"): 2343 text += f" TO LAST {self._advance_any().text}" # type: ignore 2344 after = exp.var(text) 2345 else: 2346 after = None 2347 2348 if self._match_text_seq("PATTERN"): 2349 self._match_l_paren() 2350 2351 if not self._curr: 2352 self.raise_error("Expecting )", self._curr) 2353 2354 paren = 1 2355 start = self._curr 2356 2357 while self._curr and paren > 0: 2358 if self._curr.token_type == TokenType.L_PAREN: 2359 paren += 1 2360 if self._curr.token_type == TokenType.R_PAREN: 2361 paren -= 1 2362 2363 end = self._prev 2364 self._advance() 2365 2366 if paren > 0: 2367 self.raise_error("Expecting )", self._curr) 2368 2369 pattern = exp.var(self._find_sql(start, end)) 2370 else: 2371 pattern = None 2372 2373 define = ( 2374 self._parse_csv( 2375 lambda: self.expression( 2376 exp.Alias, 2377 alias=self._parse_id_var(any_token=True), 2378 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2379 ) 2380 ) 2381 if self._match_text_seq("DEFINE") 2382 else None 2383 ) 2384 2385 self._match_r_paren() 2386 2387 return self.expression( 2388 exp.MatchRecognize, 2389 partition_by=partition, 2390 order=order, 2391 measures=measures, 2392 rows=rows, 2393 after=after, 2394 pattern=pattern, 2395 define=define, 2396 alias=self._parse_table_alias(), 2397 ) 2398 2399 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2400 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2401 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2402 2403 if outer_apply or cross_apply: 2404 this = self._parse_select(table=True) 2405 view = None 2406 outer = not cross_apply 2407 elif self._match(TokenType.LATERAL): 2408 this = self._parse_select(table=True) 2409 view = self._match(TokenType.VIEW) 2410 outer = self._match(TokenType.OUTER) 2411 else: 2412 return None 2413 2414 if not this: 2415 this = ( 2416 self._parse_unnest() 2417 or self._parse_function() 2418 or self._parse_id_var(any_token=False) 2419 ) 2420 2421 while self._match(TokenType.DOT): 2422 this = exp.Dot( 2423 this=this, 2424 expression=self._parse_function() or self._parse_id_var(any_token=False), 2425 ) 2426 2427 if view: 2428 table = self._parse_id_var(any_token=False) 2429 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2430 table_alias: t.Optional[exp.TableAlias] = self.expression( 2431 exp.TableAlias, this=table, columns=columns 2432 ) 2433 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2434 # We move the alias from the lateral's child node to the lateral itself 2435 table_alias = this.args["alias"].pop() 2436 else: 2437 table_alias = self._parse_table_alias() 2438 2439 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2440 2441 def _parse_join_parts( 2442 self, 2443 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2444 return ( 2445 self._match_set(self.JOIN_METHODS) and self._prev, 2446 self._match_set(self.JOIN_SIDES) and self._prev, 2447 self._match_set(self.JOIN_KINDS) and self._prev, 2448 ) 2449 2450 def _parse_join( 2451 self, skip_join_token: bool = False, parse_bracket: bool = False 2452 ) -> t.Optional[exp.Join]: 2453 if self._match(TokenType.COMMA): 2454 return self.expression(exp.Join, this=self._parse_table()) 2455 2456 index = self._index 2457 method, side, kind = self._parse_join_parts() 2458 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2459 join = self._match(TokenType.JOIN) 2460 2461 if not skip_join_token and not join: 2462 self._retreat(index) 2463 kind = None 2464 method = None 2465 side = None 2466 2467 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2468 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2469 2470 if not skip_join_token and not join and not outer_apply and not cross_apply: 2471 return None 2472 2473 if outer_apply: 2474 side = Token(TokenType.LEFT, "LEFT") 2475 2476 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2477 2478 if method: 2479 kwargs["method"] = method.text 2480 if side: 2481 kwargs["side"] = side.text 2482 if kind: 2483 kwargs["kind"] = kind.text 2484 if hint: 2485 kwargs["hint"] = hint 2486 2487 if self._match(TokenType.ON): 2488 kwargs["on"] = self._parse_conjunction() 2489 elif self._match(TokenType.USING): 2490 kwargs["using"] = self._parse_wrapped_id_vars() 2491 elif not (kind and kind.token_type == TokenType.CROSS): 2492 index = self._index 2493 join = self._parse_join() 2494 2495 if join and self._match(TokenType.ON): 2496 kwargs["on"] = self._parse_conjunction() 2497 elif join and self._match(TokenType.USING): 2498 kwargs["using"] = self._parse_wrapped_id_vars() 2499 else: 2500 join = None 2501 self._retreat(index) 2502 2503 kwargs["this"].set("joins", [join] if join else None) 2504 2505 comments = [c for token in (method, side, kind) if token for c in token.comments] 2506 return self.expression(exp.Join, comments=comments, **kwargs) 2507 2508 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2509 this = self._parse_conjunction() 2510 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2511 return this 2512 2513 opclass = self._parse_var(any_token=True) 2514 if opclass: 2515 return self.expression(exp.Opclass, this=this, expression=opclass) 2516 2517 return this 2518 2519 def _parse_index( 2520 self, 2521 index: t.Optional[exp.Expression] = None, 2522 ) -> t.Optional[exp.Index]: 2523 if index: 2524 unique = None 2525 primary = None 2526 amp = None 2527 2528 self._match(TokenType.ON) 2529 self._match(TokenType.TABLE) # hive 2530 table = self._parse_table_parts(schema=True) 2531 else: 2532 unique = self._match(TokenType.UNIQUE) 2533 primary = self._match_text_seq("PRIMARY") 2534 amp = self._match_text_seq("AMP") 2535 2536 if not self._match(TokenType.INDEX): 2537 return None 2538 2539 index = self._parse_id_var() 2540 table = None 2541 2542 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2543 2544 if self._match(TokenType.L_PAREN, advance=False): 2545 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2546 else: 2547 columns = None 2548 2549 return self.expression( 2550 exp.Index, 2551 this=index, 2552 table=table, 2553 using=using, 2554 columns=columns, 2555 unique=unique, 2556 primary=primary, 2557 amp=amp, 2558 partition_by=self._parse_partition_by(), 2559 where=self._parse_where(), 2560 ) 2561 2562 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2563 hints: t.List[exp.Expression] = [] 2564 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2565 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2566 hints.append( 2567 self.expression( 2568 exp.WithTableHint, 2569 expressions=self._parse_csv( 2570 lambda: self._parse_function() or self._parse_var(any_token=True) 2571 ), 2572 ) 2573 ) 2574 self._match_r_paren() 2575 else: 2576 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2577 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2578 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2579 2580 self._match_texts({"INDEX", "KEY"}) 2581 if self._match(TokenType.FOR): 2582 hint.set("target", self._advance_any() and self._prev.text.upper()) 2583 2584 hint.set("expressions", self._parse_wrapped_id_vars()) 2585 hints.append(hint) 2586 2587 return hints or None 2588 2589 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2590 return ( 2591 (not schema and self._parse_function(optional_parens=False)) 2592 or self._parse_id_var(any_token=False) 2593 or self._parse_string_as_identifier() 2594 or self._parse_placeholder() 2595 ) 2596 2597 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2598 catalog = None 2599 db = None 2600 table = self._parse_table_part(schema=schema) 2601 2602 while self._match(TokenType.DOT): 2603 if catalog: 2604 # This allows nesting the table in arbitrarily many dot expressions if needed 2605 table = self.expression( 2606 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2607 ) 2608 else: 2609 catalog = db 2610 db = table 2611 table = self._parse_table_part(schema=schema) 2612 2613 if not table: 2614 self.raise_error(f"Expected table name but got {self._curr}") 2615 2616 return self.expression( 2617 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2618 ) 2619 2620 def _parse_table( 2621 self, 2622 schema: bool = False, 2623 joins: bool = False, 2624 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2625 parse_bracket: bool = False, 2626 ) -> t.Optional[exp.Expression]: 2627 lateral = self._parse_lateral() 2628 if lateral: 2629 return lateral 2630 2631 unnest = self._parse_unnest() 2632 if unnest: 2633 return unnest 2634 2635 values = self._parse_derived_table_values() 2636 if values: 2637 return values 2638 2639 subquery = self._parse_select(table=True) 2640 if subquery: 2641 if not subquery.args.get("pivots"): 2642 subquery.set("pivots", self._parse_pivots()) 2643 return subquery 2644 2645 bracket = parse_bracket and self._parse_bracket(None) 2646 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2647 this = t.cast( 2648 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2649 ) 2650 2651 if schema: 2652 return self._parse_schema(this=this) 2653 2654 version = self._parse_version() 2655 2656 if version: 2657 this.set("version", version) 2658 2659 if self.ALIAS_POST_TABLESAMPLE: 2660 table_sample = self._parse_table_sample() 2661 2662 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2663 if alias: 2664 this.set("alias", alias) 2665 2666 if self._match_text_seq("AT"): 2667 this.set("index", self._parse_id_var()) 2668 2669 this.set("hints", self._parse_table_hints()) 2670 2671 if not this.args.get("pivots"): 2672 this.set("pivots", self._parse_pivots()) 2673 2674 if not self.ALIAS_POST_TABLESAMPLE: 2675 table_sample = self._parse_table_sample() 2676 2677 if table_sample: 2678 table_sample.set("this", this) 2679 this = table_sample 2680 2681 if joins: 2682 for join in iter(self._parse_join, None): 2683 this.append("joins", join) 2684 2685 return this 2686 2687 def _parse_version(self) -> t.Optional[exp.Version]: 2688 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2689 this = "TIMESTAMP" 2690 elif self._match(TokenType.VERSION_SNAPSHOT): 2691 this = "VERSION" 2692 else: 2693 return None 2694 2695 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2696 kind = self._prev.text.upper() 2697 start = self._parse_bitwise() 2698 self._match_texts(("TO", "AND")) 2699 end = self._parse_bitwise() 2700 expression: t.Optional[exp.Expression] = self.expression( 2701 exp.Tuple, expressions=[start, end] 2702 ) 2703 elif self._match_text_seq("CONTAINED", "IN"): 2704 kind = "CONTAINED IN" 2705 expression = self.expression( 2706 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2707 ) 2708 elif self._match(TokenType.ALL): 2709 kind = "ALL" 2710 expression = None 2711 else: 2712 self._match_text_seq("AS", "OF") 2713 kind = "AS OF" 2714 expression = self._parse_type() 2715 2716 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2717 2718 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2719 if not self._match(TokenType.UNNEST): 2720 return None 2721 2722 expressions = self._parse_wrapped_csv(self._parse_type) 2723 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2724 2725 alias = self._parse_table_alias() if with_alias else None 2726 2727 if alias: 2728 if self.UNNEST_COLUMN_ONLY: 2729 if alias.args.get("columns"): 2730 self.raise_error("Unexpected extra column alias in unnest.") 2731 2732 alias.set("columns", [alias.this]) 2733 alias.set("this", None) 2734 2735 columns = alias.args.get("columns") or [] 2736 if offset and len(expressions) < len(columns): 2737 offset = columns.pop() 2738 2739 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2740 self._match(TokenType.ALIAS) 2741 offset = self._parse_id_var( 2742 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2743 ) or exp.to_identifier("offset") 2744 2745 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2746 2747 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2748 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2749 if not is_derived and not self._match(TokenType.VALUES): 2750 return None 2751 2752 expressions = self._parse_csv(self._parse_value) 2753 alias = self._parse_table_alias() 2754 2755 if is_derived: 2756 self._match_r_paren() 2757 2758 return self.expression( 2759 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2760 ) 2761 2762 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2763 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2764 as_modifier and self._match_text_seq("USING", "SAMPLE") 2765 ): 2766 return None 2767 2768 bucket_numerator = None 2769 bucket_denominator = None 2770 bucket_field = None 2771 percent = None 2772 rows = None 2773 size = None 2774 seed = None 2775 2776 kind = ( 2777 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2778 ) 2779 method = self._parse_var(tokens=(TokenType.ROW,)) 2780 2781 matched_l_paren = self._match(TokenType.L_PAREN) 2782 2783 if self.TABLESAMPLE_CSV: 2784 num = None 2785 expressions = self._parse_csv(self._parse_primary) 2786 else: 2787 expressions = None 2788 num = ( 2789 self._parse_factor() 2790 if self._match(TokenType.NUMBER, advance=False) 2791 else self._parse_primary() 2792 ) 2793 2794 if self._match_text_seq("BUCKET"): 2795 bucket_numerator = self._parse_number() 2796 self._match_text_seq("OUT", "OF") 2797 bucket_denominator = bucket_denominator = self._parse_number() 2798 self._match(TokenType.ON) 2799 bucket_field = self._parse_field() 2800 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2801 percent = num 2802 elif self._match(TokenType.ROWS): 2803 rows = num 2804 elif num: 2805 size = num 2806 2807 if matched_l_paren: 2808 self._match_r_paren() 2809 2810 if self._match(TokenType.L_PAREN): 2811 method = self._parse_var() 2812 seed = self._match(TokenType.COMMA) and self._parse_number() 2813 self._match_r_paren() 2814 elif self._match_texts(("SEED", "REPEATABLE")): 2815 seed = self._parse_wrapped(self._parse_number) 2816 2817 return self.expression( 2818 exp.TableSample, 2819 expressions=expressions, 2820 method=method, 2821 bucket_numerator=bucket_numerator, 2822 bucket_denominator=bucket_denominator, 2823 bucket_field=bucket_field, 2824 percent=percent, 2825 rows=rows, 2826 size=size, 2827 seed=seed, 2828 kind=kind, 2829 ) 2830 2831 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2832 return list(iter(self._parse_pivot, None)) or None 2833 2834 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2835 return list(iter(self._parse_join, None)) or None 2836 2837 # https://duckdb.org/docs/sql/statements/pivot 2838 def _parse_simplified_pivot(self) -> exp.Pivot: 2839 def _parse_on() -> t.Optional[exp.Expression]: 2840 this = self._parse_bitwise() 2841 return self._parse_in(this) if self._match(TokenType.IN) else this 2842 2843 this = self._parse_table() 2844 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2845 using = self._match(TokenType.USING) and self._parse_csv( 2846 lambda: self._parse_alias(self._parse_function()) 2847 ) 2848 group = self._parse_group() 2849 return self.expression( 2850 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2851 ) 2852 2853 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2854 index = self._index 2855 include_nulls = None 2856 2857 if self._match(TokenType.PIVOT): 2858 unpivot = False 2859 elif self._match(TokenType.UNPIVOT): 2860 unpivot = True 2861 2862 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2863 if self._match_text_seq("INCLUDE", "NULLS"): 2864 include_nulls = True 2865 elif self._match_text_seq("EXCLUDE", "NULLS"): 2866 include_nulls = False 2867 else: 2868 return None 2869 2870 expressions = [] 2871 field = None 2872 2873 if not self._match(TokenType.L_PAREN): 2874 self._retreat(index) 2875 return None 2876 2877 if unpivot: 2878 expressions = self._parse_csv(self._parse_column) 2879 else: 2880 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2881 2882 if not expressions: 2883 self.raise_error("Failed to parse PIVOT's aggregation list") 2884 2885 if not self._match(TokenType.FOR): 2886 self.raise_error("Expecting FOR") 2887 2888 value = self._parse_column() 2889 2890 if not self._match(TokenType.IN): 2891 self.raise_error("Expecting IN") 2892 2893 field = self._parse_in(value, alias=True) 2894 2895 self._match_r_paren() 2896 2897 pivot = self.expression( 2898 exp.Pivot, 2899 expressions=expressions, 2900 field=field, 2901 unpivot=unpivot, 2902 include_nulls=include_nulls, 2903 ) 2904 2905 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2906 pivot.set("alias", self._parse_table_alias()) 2907 2908 if not unpivot: 2909 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2910 2911 columns: t.List[exp.Expression] = [] 2912 for fld in pivot.args["field"].expressions: 2913 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2914 for name in names: 2915 if self.PREFIXED_PIVOT_COLUMNS: 2916 name = f"{name}_{field_name}" if name else field_name 2917 else: 2918 name = f"{field_name}_{name}" if name else field_name 2919 2920 columns.append(exp.to_identifier(name)) 2921 2922 pivot.set("columns", columns) 2923 2924 return pivot 2925 2926 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2927 return [agg.alias for agg in aggregations] 2928 2929 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2930 if not skip_where_token and not self._match(TokenType.WHERE): 2931 return None 2932 2933 return self.expression( 2934 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2935 ) 2936 2937 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2938 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2939 return None 2940 2941 elements = defaultdict(list) 2942 2943 if self._match(TokenType.ALL): 2944 return self.expression(exp.Group, all=True) 2945 2946 while True: 2947 expressions = self._parse_csv(self._parse_conjunction) 2948 if expressions: 2949 elements["expressions"].extend(expressions) 2950 2951 grouping_sets = self._parse_grouping_sets() 2952 if grouping_sets: 2953 elements["grouping_sets"].extend(grouping_sets) 2954 2955 rollup = None 2956 cube = None 2957 totals = None 2958 2959 index = self._index 2960 with_ = self._match(TokenType.WITH) 2961 if self._match(TokenType.ROLLUP): 2962 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2963 elements["rollup"].extend(ensure_list(rollup)) 2964 2965 if self._match(TokenType.CUBE): 2966 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2967 elements["cube"].extend(ensure_list(cube)) 2968 2969 if self._match_text_seq("TOTALS"): 2970 totals = True 2971 elements["totals"] = True # type: ignore 2972 2973 if not (grouping_sets or rollup or cube or totals): 2974 if with_: 2975 self._retreat(index) 2976 break 2977 2978 return self.expression(exp.Group, **elements) # type: ignore 2979 2980 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2981 if not self._match(TokenType.GROUPING_SETS): 2982 return None 2983 2984 return self._parse_wrapped_csv(self._parse_grouping_set) 2985 2986 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2987 if self._match(TokenType.L_PAREN): 2988 grouping_set = self._parse_csv(self._parse_column) 2989 self._match_r_paren() 2990 return self.expression(exp.Tuple, expressions=grouping_set) 2991 2992 return self._parse_column() 2993 2994 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2995 if not skip_having_token and not self._match(TokenType.HAVING): 2996 return None 2997 return self.expression(exp.Having, this=self._parse_conjunction()) 2998 2999 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3000 if not self._match(TokenType.QUALIFY): 3001 return None 3002 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3003 3004 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3005 if skip_start_token: 3006 start = None 3007 elif self._match(TokenType.START_WITH): 3008 start = self._parse_conjunction() 3009 else: 3010 return None 3011 3012 self._match(TokenType.CONNECT_BY) 3013 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3014 exp.Prior, this=self._parse_bitwise() 3015 ) 3016 connect = self._parse_conjunction() 3017 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3018 3019 if not start and self._match(TokenType.START_WITH): 3020 start = self._parse_conjunction() 3021 3022 return self.expression(exp.Connect, start=start, connect=connect) 3023 3024 def _parse_order( 3025 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3026 ) -> t.Optional[exp.Expression]: 3027 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3028 return this 3029 3030 return self.expression( 3031 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3032 ) 3033 3034 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3035 if not self._match(token): 3036 return None 3037 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3038 3039 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3040 this = parse_method() if parse_method else self._parse_conjunction() 3041 3042 asc = self._match(TokenType.ASC) 3043 desc = self._match(TokenType.DESC) or (asc and False) 3044 3045 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3046 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3047 3048 nulls_first = is_nulls_first or False 3049 explicitly_null_ordered = is_nulls_first or is_nulls_last 3050 3051 if ( 3052 not explicitly_null_ordered 3053 and ( 3054 (not desc and self.NULL_ORDERING == "nulls_are_small") 3055 or (desc and self.NULL_ORDERING != "nulls_are_small") 3056 ) 3057 and self.NULL_ORDERING != "nulls_are_last" 3058 ): 3059 nulls_first = True 3060 3061 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3062 3063 def _parse_limit( 3064 self, this: t.Optional[exp.Expression] = None, top: bool = False 3065 ) -> t.Optional[exp.Expression]: 3066 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3067 comments = self._prev_comments 3068 if top: 3069 limit_paren = self._match(TokenType.L_PAREN) 3070 expression = self._parse_number() 3071 3072 if limit_paren: 3073 self._match_r_paren() 3074 else: 3075 expression = self._parse_term() 3076 3077 if self._match(TokenType.COMMA): 3078 offset = expression 3079 expression = self._parse_term() 3080 else: 3081 offset = None 3082 3083 limit_exp = self.expression( 3084 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3085 ) 3086 3087 return limit_exp 3088 3089 if self._match(TokenType.FETCH): 3090 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3091 direction = self._prev.text if direction else "FIRST" 3092 3093 count = self._parse_field(tokens=self.FETCH_TOKENS) 3094 percent = self._match(TokenType.PERCENT) 3095 3096 self._match_set((TokenType.ROW, TokenType.ROWS)) 3097 3098 only = self._match_text_seq("ONLY") 3099 with_ties = self._match_text_seq("WITH", "TIES") 3100 3101 if only and with_ties: 3102 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3103 3104 return self.expression( 3105 exp.Fetch, 3106 direction=direction, 3107 count=count, 3108 percent=percent, 3109 with_ties=with_ties, 3110 ) 3111 3112 return this 3113 3114 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3115 if not self._match(TokenType.OFFSET): 3116 return this 3117 3118 count = self._parse_term() 3119 self._match_set((TokenType.ROW, TokenType.ROWS)) 3120 return self.expression(exp.Offset, this=this, expression=count) 3121 3122 def _parse_locks(self) -> t.List[exp.Lock]: 3123 locks = [] 3124 while True: 3125 if self._match_text_seq("FOR", "UPDATE"): 3126 update = True 3127 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3128 "LOCK", "IN", "SHARE", "MODE" 3129 ): 3130 update = False 3131 else: 3132 break 3133 3134 expressions = None 3135 if self._match_text_seq("OF"): 3136 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3137 3138 wait: t.Optional[bool | exp.Expression] = None 3139 if self._match_text_seq("NOWAIT"): 3140 wait = True 3141 elif self._match_text_seq("WAIT"): 3142 wait = self._parse_primary() 3143 elif self._match_text_seq("SKIP", "LOCKED"): 3144 wait = False 3145 3146 locks.append( 3147 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3148 ) 3149 3150 return locks 3151 3152 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3153 if not self._match_set(self.SET_OPERATIONS): 3154 return this 3155 3156 token_type = self._prev.token_type 3157 3158 if token_type == TokenType.UNION: 3159 expression = exp.Union 3160 elif token_type == TokenType.EXCEPT: 3161 expression = exp.Except 3162 else: 3163 expression = exp.Intersect 3164 3165 return self.expression( 3166 expression, 3167 comments=self._prev.comments, 3168 this=this, 3169 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3170 by_name=self._match_text_seq("BY", "NAME"), 3171 expression=self._parse_set_operations(self._parse_select(nested=True)), 3172 ) 3173 3174 def _parse_expression(self) -> t.Optional[exp.Expression]: 3175 return self._parse_alias(self._parse_conjunction()) 3176 3177 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3178 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3179 3180 def _parse_equality(self) -> t.Optional[exp.Expression]: 3181 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3182 3183 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3184 return self._parse_tokens(self._parse_range, self.COMPARISON) 3185 3186 def _parse_range(self) -> t.Optional[exp.Expression]: 3187 this = self._parse_bitwise() 3188 negate = self._match(TokenType.NOT) 3189 3190 if self._match_set(self.RANGE_PARSERS): 3191 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3192 if not expression: 3193 return this 3194 3195 this = expression 3196 elif self._match(TokenType.ISNULL): 3197 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3198 3199 # Postgres supports ISNULL and NOTNULL for conditions. 3200 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3201 if self._match(TokenType.NOTNULL): 3202 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3203 this = self.expression(exp.Not, this=this) 3204 3205 if negate: 3206 this = self.expression(exp.Not, this=this) 3207 3208 if self._match(TokenType.IS): 3209 this = self._parse_is(this) 3210 3211 return this 3212 3213 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3214 index = self._index - 1 3215 negate = self._match(TokenType.NOT) 3216 3217 if self._match_text_seq("DISTINCT", "FROM"): 3218 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3219 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3220 3221 expression = self._parse_null() or self._parse_boolean() 3222 if not expression: 3223 self._retreat(index) 3224 return None 3225 3226 this = self.expression(exp.Is, this=this, expression=expression) 3227 return self.expression(exp.Not, this=this) if negate else this 3228 3229 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3230 unnest = self._parse_unnest(with_alias=False) 3231 if unnest: 3232 this = self.expression(exp.In, this=this, unnest=unnest) 3233 elif self._match(TokenType.L_PAREN): 3234 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3235 3236 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3237 this = self.expression(exp.In, this=this, query=expressions[0]) 3238 else: 3239 this = self.expression(exp.In, this=this, expressions=expressions) 3240 3241 self._match_r_paren(this) 3242 else: 3243 this = self.expression(exp.In, this=this, field=self._parse_field()) 3244 3245 return this 3246 3247 def _parse_between(self, this: exp.Expression) -> exp.Between: 3248 low = self._parse_bitwise() 3249 self._match(TokenType.AND) 3250 high = self._parse_bitwise() 3251 return self.expression(exp.Between, this=this, low=low, high=high) 3252 3253 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3254 if not self._match(TokenType.ESCAPE): 3255 return this 3256 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3257 3258 def _parse_interval(self) -> t.Optional[exp.Interval]: 3259 index = self._index 3260 3261 if not self._match(TokenType.INTERVAL): 3262 return None 3263 3264 if self._match(TokenType.STRING, advance=False): 3265 this = self._parse_primary() 3266 else: 3267 this = self._parse_term() 3268 3269 if not this: 3270 self._retreat(index) 3271 return None 3272 3273 unit = self._parse_function() or self._parse_var(any_token=True) 3274 3275 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3276 # each INTERVAL expression into this canonical form so it's easy to transpile 3277 if this and this.is_number: 3278 this = exp.Literal.string(this.name) 3279 elif this and this.is_string: 3280 parts = this.name.split() 3281 3282 if len(parts) == 2: 3283 if unit: 3284 # This is not actually a unit, it's something else (e.g. a "window side") 3285 unit = None 3286 self._retreat(self._index - 1) 3287 3288 this = exp.Literal.string(parts[0]) 3289 unit = self.expression(exp.Var, this=parts[1]) 3290 3291 return self.expression(exp.Interval, this=this, unit=unit) 3292 3293 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3294 this = self._parse_term() 3295 3296 while True: 3297 if self._match_set(self.BITWISE): 3298 this = self.expression( 3299 self.BITWISE[self._prev.token_type], 3300 this=this, 3301 expression=self._parse_term(), 3302 ) 3303 elif self._match(TokenType.DQMARK): 3304 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3305 elif self._match_pair(TokenType.LT, TokenType.LT): 3306 this = self.expression( 3307 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3308 ) 3309 elif self._match_pair(TokenType.GT, TokenType.GT): 3310 this = self.expression( 3311 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3312 ) 3313 else: 3314 break 3315 3316 return this 3317 3318 def _parse_term(self) -> t.Optional[exp.Expression]: 3319 return self._parse_tokens(self._parse_factor, self.TERM) 3320 3321 def _parse_factor(self) -> t.Optional[exp.Expression]: 3322 return self._parse_tokens(self._parse_unary, self.FACTOR) 3323 3324 def _parse_unary(self) -> t.Optional[exp.Expression]: 3325 if self._match_set(self.UNARY_PARSERS): 3326 return self.UNARY_PARSERS[self._prev.token_type](self) 3327 return self._parse_at_time_zone(self._parse_type()) 3328 3329 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3330 interval = parse_interval and self._parse_interval() 3331 if interval: 3332 return interval 3333 3334 index = self._index 3335 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3336 this = self._parse_column() 3337 3338 if data_type: 3339 if isinstance(this, exp.Literal): 3340 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3341 if parser: 3342 return parser(self, this, data_type) 3343 return self.expression(exp.Cast, this=this, to=data_type) 3344 if not data_type.expressions: 3345 self._retreat(index) 3346 return self._parse_column() 3347 return self._parse_column_ops(data_type) 3348 3349 return this and self._parse_column_ops(this) 3350 3351 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3352 this = self._parse_type() 3353 if not this: 3354 return None 3355 3356 return self.expression( 3357 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3358 ) 3359 3360 def _parse_types( 3361 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3362 ) -> t.Optional[exp.Expression]: 3363 index = self._index 3364 3365 prefix = self._match_text_seq("SYSUDTLIB", ".") 3366 3367 if not self._match_set(self.TYPE_TOKENS): 3368 identifier = allow_identifiers and self._parse_id_var( 3369 any_token=False, tokens=(TokenType.VAR,) 3370 ) 3371 3372 if identifier: 3373 tokens = self._tokenizer.tokenize(identifier.name) 3374 3375 if len(tokens) != 1: 3376 self.raise_error("Unexpected identifier", self._prev) 3377 3378 if tokens[0].token_type in self.TYPE_TOKENS: 3379 self._prev = tokens[0] 3380 elif self.SUPPORTS_USER_DEFINED_TYPES: 3381 type_name = identifier.name 3382 3383 while self._match(TokenType.DOT): 3384 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3385 3386 return exp.DataType.build(type_name, udt=True) 3387 else: 3388 return None 3389 else: 3390 return None 3391 3392 type_token = self._prev.token_type 3393 3394 if type_token == TokenType.PSEUDO_TYPE: 3395 return self.expression(exp.PseudoType, this=self._prev.text) 3396 3397 if type_token == TokenType.OBJECT_IDENTIFIER: 3398 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3399 3400 nested = type_token in self.NESTED_TYPE_TOKENS 3401 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3402 expressions = None 3403 maybe_func = False 3404 3405 if self._match(TokenType.L_PAREN): 3406 if is_struct: 3407 expressions = self._parse_csv(self._parse_struct_types) 3408 elif nested: 3409 expressions = self._parse_csv( 3410 lambda: self._parse_types( 3411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3412 ) 3413 ) 3414 elif type_token in self.ENUM_TYPE_TOKENS: 3415 expressions = self._parse_csv(self._parse_equality) 3416 else: 3417 expressions = self._parse_csv(self._parse_type_size) 3418 3419 if not expressions or not self._match(TokenType.R_PAREN): 3420 self._retreat(index) 3421 return None 3422 3423 maybe_func = True 3424 3425 this: t.Optional[exp.Expression] = None 3426 values: t.Optional[t.List[exp.Expression]] = None 3427 3428 if nested and self._match(TokenType.LT): 3429 if is_struct: 3430 expressions = self._parse_csv(self._parse_struct_types) 3431 else: 3432 expressions = self._parse_csv( 3433 lambda: self._parse_types( 3434 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3435 ) 3436 ) 3437 3438 if not self._match(TokenType.GT): 3439 self.raise_error("Expecting >") 3440 3441 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3442 values = self._parse_csv(self._parse_conjunction) 3443 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3444 3445 if type_token in self.TIMESTAMPS: 3446 if self._match_text_seq("WITH", "TIME", "ZONE"): 3447 maybe_func = False 3448 tz_type = ( 3449 exp.DataType.Type.TIMETZ 3450 if type_token in self.TIMES 3451 else exp.DataType.Type.TIMESTAMPTZ 3452 ) 3453 this = exp.DataType(this=tz_type, expressions=expressions) 3454 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3455 maybe_func = False 3456 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3457 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3458 maybe_func = False 3459 elif type_token == TokenType.INTERVAL: 3460 unit = self._parse_var() 3461 3462 if self._match_text_seq("TO"): 3463 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3464 else: 3465 span = None 3466 3467 if span or not unit: 3468 this = self.expression( 3469 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3470 ) 3471 else: 3472 this = self.expression(exp.Interval, unit=unit) 3473 3474 if maybe_func and check_func: 3475 index2 = self._index 3476 peek = self._parse_string() 3477 3478 if not peek: 3479 self._retreat(index) 3480 return None 3481 3482 self._retreat(index2) 3483 3484 if not this: 3485 if self._match_text_seq("UNSIGNED"): 3486 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3487 if not unsigned_type_token: 3488 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3489 3490 type_token = unsigned_type_token or type_token 3491 3492 this = exp.DataType( 3493 this=exp.DataType.Type[type_token.value], 3494 expressions=expressions, 3495 nested=nested, 3496 values=values, 3497 prefix=prefix, 3498 ) 3499 3500 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3501 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3502 3503 return this 3504 3505 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3506 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3507 self._match(TokenType.COLON) 3508 return self._parse_column_def(this) 3509 3510 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3511 if not self._match_text_seq("AT", "TIME", "ZONE"): 3512 return this 3513 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3514 3515 def _parse_column(self) -> t.Optional[exp.Expression]: 3516 this = self._parse_field() 3517 if isinstance(this, exp.Identifier): 3518 this = self.expression(exp.Column, this=this) 3519 elif not this: 3520 return self._parse_bracket(this) 3521 return self._parse_column_ops(this) 3522 3523 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3524 this = self._parse_bracket(this) 3525 3526 while self._match_set(self.COLUMN_OPERATORS): 3527 op_token = self._prev.token_type 3528 op = self.COLUMN_OPERATORS.get(op_token) 3529 3530 if op_token == TokenType.DCOLON: 3531 field = self._parse_types() 3532 if not field: 3533 self.raise_error("Expected type") 3534 elif op and self._curr: 3535 self._advance() 3536 value = self._prev.text 3537 field = ( 3538 exp.Literal.number(value) 3539 if self._prev.token_type == TokenType.NUMBER 3540 else exp.Literal.string(value) 3541 ) 3542 else: 3543 field = self._parse_field(anonymous_func=True, any_token=True) 3544 3545 if isinstance(field, exp.Func): 3546 # bigquery allows function calls like x.y.count(...) 3547 # SAFE.SUBSTR(...) 3548 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3549 this = self._replace_columns_with_dots(this) 3550 3551 if op: 3552 this = op(self, this, field) 3553 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3554 this = self.expression( 3555 exp.Column, 3556 this=field, 3557 table=this.this, 3558 db=this.args.get("table"), 3559 catalog=this.args.get("db"), 3560 ) 3561 else: 3562 this = self.expression(exp.Dot, this=this, expression=field) 3563 this = self._parse_bracket(this) 3564 return this 3565 3566 def _parse_primary(self) -> t.Optional[exp.Expression]: 3567 if self._match_set(self.PRIMARY_PARSERS): 3568 token_type = self._prev.token_type 3569 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3570 3571 if token_type == TokenType.STRING: 3572 expressions = [primary] 3573 while self._match(TokenType.STRING): 3574 expressions.append(exp.Literal.string(self._prev.text)) 3575 3576 if len(expressions) > 1: 3577 return self.expression(exp.Concat, expressions=expressions) 3578 3579 return primary 3580 3581 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3582 return exp.Literal.number(f"0.{self._prev.text}") 3583 3584 if self._match(TokenType.L_PAREN): 3585 comments = self._prev_comments 3586 query = self._parse_select() 3587 3588 if query: 3589 expressions = [query] 3590 else: 3591 expressions = self._parse_expressions() 3592 3593 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3594 3595 if isinstance(this, exp.Subqueryable): 3596 this = self._parse_set_operations( 3597 self._parse_subquery(this=this, parse_alias=False) 3598 ) 3599 elif len(expressions) > 1: 3600 this = self.expression(exp.Tuple, expressions=expressions) 3601 else: 3602 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3603 3604 if this: 3605 this.add_comments(comments) 3606 3607 self._match_r_paren(expression=this) 3608 return this 3609 3610 return None 3611 3612 def _parse_field( 3613 self, 3614 any_token: bool = False, 3615 tokens: t.Optional[t.Collection[TokenType]] = None, 3616 anonymous_func: bool = False, 3617 ) -> t.Optional[exp.Expression]: 3618 return ( 3619 self._parse_primary() 3620 or self._parse_function(anonymous=anonymous_func) 3621 or self._parse_id_var(any_token=any_token, tokens=tokens) 3622 ) 3623 3624 def _parse_function( 3625 self, 3626 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3627 anonymous: bool = False, 3628 optional_parens: bool = True, 3629 ) -> t.Optional[exp.Expression]: 3630 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3631 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3632 fn_syntax = False 3633 if ( 3634 self._match(TokenType.L_BRACE, advance=False) 3635 and self._next 3636 and self._next.text.upper() == "FN" 3637 ): 3638 self._advance(2) 3639 fn_syntax = True 3640 3641 func = self._parse_function_call( 3642 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3643 ) 3644 3645 if fn_syntax: 3646 self._match(TokenType.R_BRACE) 3647 3648 return func 3649 3650 def _parse_function_call( 3651 self, 3652 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3653 anonymous: bool = False, 3654 optional_parens: bool = True, 3655 ) -> t.Optional[exp.Expression]: 3656 if not self._curr: 3657 return None 3658 3659 token_type = self._curr.token_type 3660 this = self._curr.text 3661 upper = this.upper() 3662 3663 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3664 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3665 self._advance() 3666 return parser(self) 3667 3668 if not self._next or self._next.token_type != TokenType.L_PAREN: 3669 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3670 self._advance() 3671 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3672 3673 return None 3674 3675 if token_type not in self.FUNC_TOKENS: 3676 return None 3677 3678 self._advance(2) 3679 3680 parser = self.FUNCTION_PARSERS.get(upper) 3681 if parser and not anonymous: 3682 this = parser(self) 3683 else: 3684 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3685 3686 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3687 this = self.expression(subquery_predicate, this=self._parse_select()) 3688 self._match_r_paren() 3689 return this 3690 3691 if functions is None: 3692 functions = self.FUNCTIONS 3693 3694 function = functions.get(upper) 3695 3696 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3697 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3698 3699 if function and not anonymous: 3700 func = self.validate_expression(function(args), args) 3701 if not self.NORMALIZE_FUNCTIONS: 3702 func.meta["name"] = this 3703 this = func 3704 else: 3705 this = self.expression(exp.Anonymous, this=this, expressions=args) 3706 3707 self._match_r_paren(this) 3708 return self._parse_window(this) 3709 3710 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3711 return self._parse_column_def(self._parse_id_var()) 3712 3713 def _parse_user_defined_function( 3714 self, kind: t.Optional[TokenType] = None 3715 ) -> t.Optional[exp.Expression]: 3716 this = self._parse_id_var() 3717 3718 while self._match(TokenType.DOT): 3719 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3720 3721 if not self._match(TokenType.L_PAREN): 3722 return this 3723 3724 expressions = self._parse_csv(self._parse_function_parameter) 3725 self._match_r_paren() 3726 return self.expression( 3727 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3728 ) 3729 3730 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3731 literal = self._parse_primary() 3732 if literal: 3733 return self.expression(exp.Introducer, this=token.text, expression=literal) 3734 3735 return self.expression(exp.Identifier, this=token.text) 3736 3737 def _parse_session_parameter(self) -> exp.SessionParameter: 3738 kind = None 3739 this = self._parse_id_var() or self._parse_primary() 3740 3741 if this and self._match(TokenType.DOT): 3742 kind = this.name 3743 this = self._parse_var() or self._parse_primary() 3744 3745 return self.expression(exp.SessionParameter, this=this, kind=kind) 3746 3747 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3748 index = self._index 3749 3750 if self._match(TokenType.L_PAREN): 3751 expressions = t.cast( 3752 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3753 ) 3754 3755 if not self._match(TokenType.R_PAREN): 3756 self._retreat(index) 3757 else: 3758 expressions = [self._parse_id_var()] 3759 3760 if self._match_set(self.LAMBDAS): 3761 return self.LAMBDAS[self._prev.token_type](self, expressions) 3762 3763 self._retreat(index) 3764 3765 this: t.Optional[exp.Expression] 3766 3767 if self._match(TokenType.DISTINCT): 3768 this = self.expression( 3769 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3770 ) 3771 else: 3772 this = self._parse_select_or_expression(alias=alias) 3773 3774 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3775 3776 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3777 index = self._index 3778 3779 if not self.errors: 3780 try: 3781 if self._parse_select(nested=True): 3782 return this 3783 except ParseError: 3784 pass 3785 finally: 3786 self.errors.clear() 3787 self._retreat(index) 3788 3789 if not self._match(TokenType.L_PAREN): 3790 return this 3791 3792 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3793 3794 self._match_r_paren() 3795 return self.expression(exp.Schema, this=this, expressions=args) 3796 3797 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3798 return self._parse_column_def(self._parse_field(any_token=True)) 3799 3800 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3801 # column defs are not really columns, they're identifiers 3802 if isinstance(this, exp.Column): 3803 this = this.this 3804 3805 kind = self._parse_types(schema=True) 3806 3807 if self._match_text_seq("FOR", "ORDINALITY"): 3808 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3809 3810 constraints: t.List[exp.Expression] = [] 3811 3812 if not kind and self._match(TokenType.ALIAS): 3813 constraints.append( 3814 self.expression( 3815 exp.ComputedColumnConstraint, 3816 this=self._parse_conjunction(), 3817 persisted=self._match_text_seq("PERSISTED"), 3818 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3819 ) 3820 ) 3821 3822 while True: 3823 constraint = self._parse_column_constraint() 3824 if not constraint: 3825 break 3826 constraints.append(constraint) 3827 3828 if not kind and not constraints: 3829 return this 3830 3831 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3832 3833 def _parse_auto_increment( 3834 self, 3835 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3836 start = None 3837 increment = None 3838 3839 if self._match(TokenType.L_PAREN, advance=False): 3840 args = self._parse_wrapped_csv(self._parse_bitwise) 3841 start = seq_get(args, 0) 3842 increment = seq_get(args, 1) 3843 elif self._match_text_seq("START"): 3844 start = self._parse_bitwise() 3845 self._match_text_seq("INCREMENT") 3846 increment = self._parse_bitwise() 3847 3848 if start and increment: 3849 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3850 3851 return exp.AutoIncrementColumnConstraint() 3852 3853 def _parse_compress(self) -> exp.CompressColumnConstraint: 3854 if self._match(TokenType.L_PAREN, advance=False): 3855 return self.expression( 3856 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3857 ) 3858 3859 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3860 3861 def _parse_generated_as_identity( 3862 self, 3863 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3864 if self._match_text_seq("BY", "DEFAULT"): 3865 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3866 this = self.expression( 3867 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3868 ) 3869 else: 3870 self._match_text_seq("ALWAYS") 3871 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3872 3873 self._match(TokenType.ALIAS) 3874 identity = self._match_text_seq("IDENTITY") 3875 3876 if self._match(TokenType.L_PAREN): 3877 if self._match(TokenType.START_WITH): 3878 this.set("start", self._parse_bitwise()) 3879 if self._match_text_seq("INCREMENT", "BY"): 3880 this.set("increment", self._parse_bitwise()) 3881 if self._match_text_seq("MINVALUE"): 3882 this.set("minvalue", self._parse_bitwise()) 3883 if self._match_text_seq("MAXVALUE"): 3884 this.set("maxvalue", self._parse_bitwise()) 3885 3886 if self._match_text_seq("CYCLE"): 3887 this.set("cycle", True) 3888 elif self._match_text_seq("NO", "CYCLE"): 3889 this.set("cycle", False) 3890 3891 if not identity: 3892 this.set("expression", self._parse_bitwise()) 3893 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3894 args = self._parse_csv(self._parse_bitwise) 3895 this.set("start", seq_get(args, 0)) 3896 this.set("increment", seq_get(args, 1)) 3897 3898 self._match_r_paren() 3899 3900 return this 3901 3902 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3903 self._match_text_seq("LENGTH") 3904 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3905 3906 def _parse_not_constraint( 3907 self, 3908 ) -> t.Optional[exp.Expression]: 3909 if self._match_text_seq("NULL"): 3910 return self.expression(exp.NotNullColumnConstraint) 3911 if self._match_text_seq("CASESPECIFIC"): 3912 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3913 if self._match_text_seq("FOR", "REPLICATION"): 3914 return self.expression(exp.NotForReplicationColumnConstraint) 3915 return None 3916 3917 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3918 if self._match(TokenType.CONSTRAINT): 3919 this = self._parse_id_var() 3920 else: 3921 this = None 3922 3923 if self._match_texts(self.CONSTRAINT_PARSERS): 3924 return self.expression( 3925 exp.ColumnConstraint, 3926 this=this, 3927 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3928 ) 3929 3930 return this 3931 3932 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3933 if not self._match(TokenType.CONSTRAINT): 3934 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3935 3936 this = self._parse_id_var() 3937 expressions = [] 3938 3939 while True: 3940 constraint = self._parse_unnamed_constraint() or self._parse_function() 3941 if not constraint: 3942 break 3943 expressions.append(constraint) 3944 3945 return self.expression(exp.Constraint, this=this, expressions=expressions) 3946 3947 def _parse_unnamed_constraint( 3948 self, constraints: t.Optional[t.Collection[str]] = None 3949 ) -> t.Optional[exp.Expression]: 3950 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3951 constraints or self.CONSTRAINT_PARSERS 3952 ): 3953 return None 3954 3955 constraint = self._prev.text.upper() 3956 if constraint not in self.CONSTRAINT_PARSERS: 3957 self.raise_error(f"No parser found for schema constraint {constraint}.") 3958 3959 return self.CONSTRAINT_PARSERS[constraint](self) 3960 3961 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3962 self._match_text_seq("KEY") 3963 return self.expression( 3964 exp.UniqueColumnConstraint, 3965 this=self._parse_schema(self._parse_id_var(any_token=False)), 3966 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3967 ) 3968 3969 def _parse_key_constraint_options(self) -> t.List[str]: 3970 options = [] 3971 while True: 3972 if not self._curr: 3973 break 3974 3975 if self._match(TokenType.ON): 3976 action = None 3977 on = self._advance_any() and self._prev.text 3978 3979 if self._match_text_seq("NO", "ACTION"): 3980 action = "NO ACTION" 3981 elif self._match_text_seq("CASCADE"): 3982 action = "CASCADE" 3983 elif self._match_text_seq("RESTRICT"): 3984 action = "RESTRICT" 3985 elif self._match_pair(TokenType.SET, TokenType.NULL): 3986 action = "SET NULL" 3987 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3988 action = "SET DEFAULT" 3989 else: 3990 self.raise_error("Invalid key constraint") 3991 3992 options.append(f"ON {on} {action}") 3993 elif self._match_text_seq("NOT", "ENFORCED"): 3994 options.append("NOT ENFORCED") 3995 elif self._match_text_seq("DEFERRABLE"): 3996 options.append("DEFERRABLE") 3997 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3998 options.append("INITIALLY DEFERRED") 3999 elif self._match_text_seq("NORELY"): 4000 options.append("NORELY") 4001 elif self._match_text_seq("MATCH", "FULL"): 4002 options.append("MATCH FULL") 4003 else: 4004 break 4005 4006 return options 4007 4008 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4009 if match and not self._match(TokenType.REFERENCES): 4010 return None 4011 4012 expressions = None 4013 this = self._parse_table(schema=True) 4014 options = self._parse_key_constraint_options() 4015 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4016 4017 def _parse_foreign_key(self) -> exp.ForeignKey: 4018 expressions = self._parse_wrapped_id_vars() 4019 reference = self._parse_references() 4020 options = {} 4021 4022 while self._match(TokenType.ON): 4023 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4024 self.raise_error("Expected DELETE or UPDATE") 4025 4026 kind = self._prev.text.lower() 4027 4028 if self._match_text_seq("NO", "ACTION"): 4029 action = "NO ACTION" 4030 elif self._match(TokenType.SET): 4031 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4032 action = "SET " + self._prev.text.upper() 4033 else: 4034 self._advance() 4035 action = self._prev.text.upper() 4036 4037 options[kind] = action 4038 4039 return self.expression( 4040 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4041 ) 4042 4043 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4044 return self._parse_field() 4045 4046 def _parse_primary_key( 4047 self, wrapped_optional: bool = False, in_props: bool = False 4048 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4049 desc = ( 4050 self._match_set((TokenType.ASC, TokenType.DESC)) 4051 and self._prev.token_type == TokenType.DESC 4052 ) 4053 4054 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4055 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4056 4057 expressions = self._parse_wrapped_csv( 4058 self._parse_primary_key_part, optional=wrapped_optional 4059 ) 4060 options = self._parse_key_constraint_options() 4061 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4062 4063 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4064 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4065 return this 4066 4067 bracket_kind = self._prev.token_type 4068 4069 if self._match(TokenType.COLON): 4070 expressions: t.List[exp.Expression] = [ 4071 self.expression(exp.Slice, expression=self._parse_conjunction()) 4072 ] 4073 else: 4074 expressions = self._parse_csv( 4075 lambda: self._parse_slice( 4076 self._parse_alias(self._parse_conjunction(), explicit=True) 4077 ) 4078 ) 4079 4080 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4081 self.raise_error("Expected ]") 4082 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4083 self.raise_error("Expected }") 4084 4085 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4086 if bracket_kind == TokenType.L_BRACE: 4087 this = self.expression(exp.Struct, expressions=expressions) 4088 elif not this or this.name.upper() == "ARRAY": 4089 this = self.expression(exp.Array, expressions=expressions) 4090 else: 4091 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4092 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4093 4094 self._add_comments(this) 4095 return self._parse_bracket(this) 4096 4097 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4098 if self._match(TokenType.COLON): 4099 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4100 return this 4101 4102 def _parse_case(self) -> t.Optional[exp.Expression]: 4103 ifs = [] 4104 default = None 4105 4106 comments = self._prev_comments 4107 expression = self._parse_conjunction() 4108 4109 while self._match(TokenType.WHEN): 4110 this = self._parse_conjunction() 4111 self._match(TokenType.THEN) 4112 then = self._parse_conjunction() 4113 ifs.append(self.expression(exp.If, this=this, true=then)) 4114 4115 if self._match(TokenType.ELSE): 4116 default = self._parse_conjunction() 4117 4118 if not self._match(TokenType.END): 4119 self.raise_error("Expected END after CASE", self._prev) 4120 4121 return self._parse_window( 4122 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4123 ) 4124 4125 def _parse_if(self) -> t.Optional[exp.Expression]: 4126 if self._match(TokenType.L_PAREN): 4127 args = self._parse_csv(self._parse_conjunction) 4128 this = self.validate_expression(exp.If.from_arg_list(args), args) 4129 self._match_r_paren() 4130 else: 4131 index = self._index - 1 4132 condition = self._parse_conjunction() 4133 4134 if not condition: 4135 self._retreat(index) 4136 return None 4137 4138 self._match(TokenType.THEN) 4139 true = self._parse_conjunction() 4140 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4141 self._match(TokenType.END) 4142 this = self.expression(exp.If, this=condition, true=true, false=false) 4143 4144 return self._parse_window(this) 4145 4146 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4147 if not self._match_text_seq("VALUE", "FOR"): 4148 self._retreat(self._index - 1) 4149 return None 4150 4151 return self.expression( 4152 exp.NextValueFor, 4153 this=self._parse_column(), 4154 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4155 ) 4156 4157 def _parse_extract(self) -> exp.Extract: 4158 this = self._parse_function() or self._parse_var() or self._parse_type() 4159 4160 if self._match(TokenType.FROM): 4161 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4162 4163 if not self._match(TokenType.COMMA): 4164 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4165 4166 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4167 4168 def _parse_any_value(self) -> exp.AnyValue: 4169 this = self._parse_lambda() 4170 is_max = None 4171 having = None 4172 4173 if self._match(TokenType.HAVING): 4174 self._match_texts(("MAX", "MIN")) 4175 is_max = self._prev.text == "MAX" 4176 having = self._parse_column() 4177 4178 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4179 4180 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4181 this = self._parse_conjunction() 4182 4183 if not self._match(TokenType.ALIAS): 4184 if self._match(TokenType.COMMA): 4185 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4186 4187 self.raise_error("Expected AS after CAST") 4188 4189 fmt = None 4190 to = self._parse_types() 4191 4192 if not to: 4193 self.raise_error("Expected TYPE after CAST") 4194 elif isinstance(to, exp.Identifier): 4195 to = exp.DataType.build(to.name, udt=True) 4196 elif to.this == exp.DataType.Type.CHAR: 4197 if self._match(TokenType.CHARACTER_SET): 4198 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4199 elif self._match(TokenType.FORMAT): 4200 fmt_string = self._parse_string() 4201 fmt = self._parse_at_time_zone(fmt_string) 4202 4203 if to.this in exp.DataType.TEMPORAL_TYPES: 4204 this = self.expression( 4205 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4206 this=this, 4207 format=exp.Literal.string( 4208 format_time( 4209 fmt_string.this if fmt_string else "", 4210 self.FORMAT_MAPPING or self.TIME_MAPPING, 4211 self.FORMAT_TRIE or self.TIME_TRIE, 4212 ) 4213 ), 4214 ) 4215 4216 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4217 this.set("zone", fmt.args["zone"]) 4218 4219 return this 4220 4221 return self.expression( 4222 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4223 ) 4224 4225 def _parse_concat(self) -> t.Optional[exp.Expression]: 4226 args = self._parse_csv(self._parse_conjunction) 4227 if self.CONCAT_NULL_OUTPUTS_STRING: 4228 args = self._ensure_string_if_null(args) 4229 4230 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4231 # we find such a call we replace it with its argument. 4232 if len(args) == 1: 4233 return args[0] 4234 4235 return self.expression( 4236 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4237 ) 4238 4239 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4240 args = self._parse_csv(self._parse_conjunction) 4241 if len(args) < 2: 4242 return self.expression(exp.ConcatWs, expressions=args) 4243 delim, *values = args 4244 if self.CONCAT_NULL_OUTPUTS_STRING: 4245 values = self._ensure_string_if_null(values) 4246 4247 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4248 4249 def _parse_string_agg(self) -> exp.Expression: 4250 if self._match(TokenType.DISTINCT): 4251 args: t.List[t.Optional[exp.Expression]] = [ 4252 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4253 ] 4254 if self._match(TokenType.COMMA): 4255 args.extend(self._parse_csv(self._parse_conjunction)) 4256 else: 4257 args = self._parse_csv(self._parse_conjunction) # type: ignore 4258 4259 index = self._index 4260 if not self._match(TokenType.R_PAREN) and args: 4261 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4262 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4263 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4264 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4265 4266 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4267 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4268 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4269 if not self._match_text_seq("WITHIN", "GROUP"): 4270 self._retreat(index) 4271 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4272 4273 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4274 order = self._parse_order(this=seq_get(args, 0)) 4275 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4276 4277 def _parse_convert( 4278 self, strict: bool, safe: t.Optional[bool] = None 4279 ) -> t.Optional[exp.Expression]: 4280 this = self._parse_bitwise() 4281 4282 if self._match(TokenType.USING): 4283 to: t.Optional[exp.Expression] = self.expression( 4284 exp.CharacterSet, this=self._parse_var() 4285 ) 4286 elif self._match(TokenType.COMMA): 4287 to = self._parse_types() 4288 else: 4289 to = None 4290 4291 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4292 4293 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4294 """ 4295 There are generally two variants of the DECODE function: 4296 4297 - DECODE(bin, charset) 4298 - DECODE(expression, search, result [, search, result] ... [, default]) 4299 4300 The second variant will always be parsed into a CASE expression. Note that NULL 4301 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4302 instead of relying on pattern matching. 4303 """ 4304 args = self._parse_csv(self._parse_conjunction) 4305 4306 if len(args) < 3: 4307 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4308 4309 expression, *expressions = args 4310 if not expression: 4311 return None 4312 4313 ifs = [] 4314 for search, result in zip(expressions[::2], expressions[1::2]): 4315 if not search or not result: 4316 return None 4317 4318 if isinstance(search, exp.Literal): 4319 ifs.append( 4320 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4321 ) 4322 elif isinstance(search, exp.Null): 4323 ifs.append( 4324 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4325 ) 4326 else: 4327 cond = exp.or_( 4328 exp.EQ(this=expression.copy(), expression=search), 4329 exp.and_( 4330 exp.Is(this=expression.copy(), expression=exp.Null()), 4331 exp.Is(this=search.copy(), expression=exp.Null()), 4332 copy=False, 4333 ), 4334 copy=False, 4335 ) 4336 ifs.append(exp.If(this=cond, true=result)) 4337 4338 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4339 4340 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4341 self._match_text_seq("KEY") 4342 key = self._parse_column() 4343 self._match_set((TokenType.COLON, TokenType.COMMA)) 4344 self._match_text_seq("VALUE") 4345 value = self._parse_bitwise() 4346 4347 if not key and not value: 4348 return None 4349 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4350 4351 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4352 if not this or not self._match_text_seq("FORMAT", "JSON"): 4353 return this 4354 4355 return self.expression(exp.FormatJson, this=this) 4356 4357 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4358 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4359 for value in values: 4360 if self._match_text_seq(value, "ON", on): 4361 return f"{value} ON {on}" 4362 4363 return None 4364 4365 def _parse_json_object(self) -> exp.JSONObject: 4366 star = self._parse_star() 4367 expressions = ( 4368 [star] 4369 if star 4370 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4371 ) 4372 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4373 4374 unique_keys = None 4375 if self._match_text_seq("WITH", "UNIQUE"): 4376 unique_keys = True 4377 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4378 unique_keys = False 4379 4380 self._match_text_seq("KEYS") 4381 4382 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4383 self._parse_type() 4384 ) 4385 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4386 4387 return self.expression( 4388 exp.JSONObject, 4389 expressions=expressions, 4390 null_handling=null_handling, 4391 unique_keys=unique_keys, 4392 return_type=return_type, 4393 encoding=encoding, 4394 ) 4395 4396 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4397 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4398 if not self._match_text_seq("NESTED"): 4399 this = self._parse_id_var() 4400 kind = self._parse_types(allow_identifiers=False) 4401 nested = None 4402 else: 4403 this = None 4404 kind = None 4405 nested = True 4406 4407 path = self._match_text_seq("PATH") and self._parse_string() 4408 nested_schema = nested and self._parse_json_schema() 4409 4410 return self.expression( 4411 exp.JSONColumnDef, 4412 this=this, 4413 kind=kind, 4414 path=path, 4415 nested_schema=nested_schema, 4416 ) 4417 4418 def _parse_json_schema(self) -> exp.JSONSchema: 4419 self._match_text_seq("COLUMNS") 4420 return self.expression( 4421 exp.JSONSchema, 4422 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4423 ) 4424 4425 def _parse_json_table(self) -> exp.JSONTable: 4426 this = self._parse_format_json(self._parse_bitwise()) 4427 path = self._match(TokenType.COMMA) and self._parse_string() 4428 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4429 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4430 schema = self._parse_json_schema() 4431 4432 return exp.JSONTable( 4433 this=this, 4434 schema=schema, 4435 path=path, 4436 error_handling=error_handling, 4437 empty_handling=empty_handling, 4438 ) 4439 4440 def _parse_logarithm(self) -> exp.Func: 4441 # Default argument order is base, expression 4442 args = self._parse_csv(self._parse_range) 4443 4444 if len(args) > 1: 4445 if not self.LOG_BASE_FIRST: 4446 args.reverse() 4447 return exp.Log.from_arg_list(args) 4448 4449 return self.expression( 4450 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4451 ) 4452 4453 def _parse_match_against(self) -> exp.MatchAgainst: 4454 expressions = self._parse_csv(self._parse_column) 4455 4456 self._match_text_seq(")", "AGAINST", "(") 4457 4458 this = self._parse_string() 4459 4460 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4461 modifier = "IN NATURAL LANGUAGE MODE" 4462 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4463 modifier = f"{modifier} WITH QUERY EXPANSION" 4464 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4465 modifier = "IN BOOLEAN MODE" 4466 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4467 modifier = "WITH QUERY EXPANSION" 4468 else: 4469 modifier = None 4470 4471 return self.expression( 4472 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4473 ) 4474 4475 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4476 def _parse_open_json(self) -> exp.OpenJSON: 4477 this = self._parse_bitwise() 4478 path = self._match(TokenType.COMMA) and self._parse_string() 4479 4480 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4481 this = self._parse_field(any_token=True) 4482 kind = self._parse_types() 4483 path = self._parse_string() 4484 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4485 4486 return self.expression( 4487 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4488 ) 4489 4490 expressions = None 4491 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4492 self._match_l_paren() 4493 expressions = self._parse_csv(_parse_open_json_column_def) 4494 4495 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4496 4497 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4498 args = self._parse_csv(self._parse_bitwise) 4499 4500 if self._match(TokenType.IN): 4501 return self.expression( 4502 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4503 ) 4504 4505 if haystack_first: 4506 haystack = seq_get(args, 0) 4507 needle = seq_get(args, 1) 4508 else: 4509 needle = seq_get(args, 0) 4510 haystack = seq_get(args, 1) 4511 4512 return self.expression( 4513 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4514 ) 4515 4516 def _parse_predict(self) -> exp.Predict: 4517 self._match_text_seq("MODEL") 4518 this = self._parse_table() 4519 4520 self._match(TokenType.COMMA) 4521 self._match_text_seq("TABLE") 4522 4523 return self.expression( 4524 exp.Predict, 4525 this=this, 4526 expression=self._parse_table(), 4527 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4528 ) 4529 4530 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4531 args = self._parse_csv(self._parse_table) 4532 return exp.JoinHint(this=func_name.upper(), expressions=args) 4533 4534 def _parse_substring(self) -> exp.Substring: 4535 # Postgres supports the form: substring(string [from int] [for int]) 4536 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4537 4538 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4539 4540 if self._match(TokenType.FROM): 4541 args.append(self._parse_bitwise()) 4542 if self._match(TokenType.FOR): 4543 args.append(self._parse_bitwise()) 4544 4545 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4546 4547 def _parse_trim(self) -> exp.Trim: 4548 # https://www.w3resource.com/sql/character-functions/trim.php 4549 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4550 4551 position = None 4552 collation = None 4553 expression = None 4554 4555 if self._match_texts(self.TRIM_TYPES): 4556 position = self._prev.text.upper() 4557 4558 this = self._parse_bitwise() 4559 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4560 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4561 expression = self._parse_bitwise() 4562 4563 if invert_order: 4564 this, expression = expression, this 4565 4566 if self._match(TokenType.COLLATE): 4567 collation = self._parse_bitwise() 4568 4569 return self.expression( 4570 exp.Trim, this=this, position=position, expression=expression, collation=collation 4571 ) 4572 4573 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4574 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4575 4576 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4577 return self._parse_window(self._parse_id_var(), alias=True) 4578 4579 def _parse_respect_or_ignore_nulls( 4580 self, this: t.Optional[exp.Expression] 4581 ) -> t.Optional[exp.Expression]: 4582 if self._match_text_seq("IGNORE", "NULLS"): 4583 return self.expression(exp.IgnoreNulls, this=this) 4584 if self._match_text_seq("RESPECT", "NULLS"): 4585 return self.expression(exp.RespectNulls, this=this) 4586 return this 4587 4588 def _parse_window( 4589 self, this: t.Optional[exp.Expression], alias: bool = False 4590 ) -> t.Optional[exp.Expression]: 4591 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4592 self._match(TokenType.WHERE) 4593 this = self.expression( 4594 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4595 ) 4596 self._match_r_paren() 4597 4598 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4599 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4600 if self._match_text_seq("WITHIN", "GROUP"): 4601 order = self._parse_wrapped(self._parse_order) 4602 this = self.expression(exp.WithinGroup, this=this, expression=order) 4603 4604 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4605 # Some dialects choose to implement and some do not. 4606 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4607 4608 # There is some code above in _parse_lambda that handles 4609 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4610 4611 # The below changes handle 4612 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4613 4614 # Oracle allows both formats 4615 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4616 # and Snowflake chose to do the same for familiarity 4617 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4618 this = self._parse_respect_or_ignore_nulls(this) 4619 4620 # bigquery select from window x AS (partition by ...) 4621 if alias: 4622 over = None 4623 self._match(TokenType.ALIAS) 4624 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4625 return this 4626 else: 4627 over = self._prev.text.upper() 4628 4629 if not self._match(TokenType.L_PAREN): 4630 return self.expression( 4631 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4632 ) 4633 4634 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4635 4636 first = self._match(TokenType.FIRST) 4637 if self._match_text_seq("LAST"): 4638 first = False 4639 4640 partition, order = self._parse_partition_and_order() 4641 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4642 4643 if kind: 4644 self._match(TokenType.BETWEEN) 4645 start = self._parse_window_spec() 4646 self._match(TokenType.AND) 4647 end = self._parse_window_spec() 4648 4649 spec = self.expression( 4650 exp.WindowSpec, 4651 kind=kind, 4652 start=start["value"], 4653 start_side=start["side"], 4654 end=end["value"], 4655 end_side=end["side"], 4656 ) 4657 else: 4658 spec = None 4659 4660 self._match_r_paren() 4661 4662 window = self.expression( 4663 exp.Window, 4664 this=this, 4665 partition_by=partition, 4666 order=order, 4667 spec=spec, 4668 alias=window_alias, 4669 over=over, 4670 first=first, 4671 ) 4672 4673 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4674 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4675 return self._parse_window(window, alias=alias) 4676 4677 return window 4678 4679 def _parse_partition_and_order( 4680 self, 4681 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4682 return self._parse_partition_by(), self._parse_order() 4683 4684 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4685 self._match(TokenType.BETWEEN) 4686 4687 return { 4688 "value": ( 4689 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4690 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4691 or self._parse_bitwise() 4692 ), 4693 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4694 } 4695 4696 def _parse_alias( 4697 self, this: t.Optional[exp.Expression], explicit: bool = False 4698 ) -> t.Optional[exp.Expression]: 4699 any_token = self._match(TokenType.ALIAS) 4700 4701 if explicit and not any_token: 4702 return this 4703 4704 if self._match(TokenType.L_PAREN): 4705 aliases = self.expression( 4706 exp.Aliases, 4707 this=this, 4708 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4709 ) 4710 self._match_r_paren(aliases) 4711 return aliases 4712 4713 alias = self._parse_id_var(any_token) 4714 4715 if alias: 4716 return self.expression(exp.Alias, this=this, alias=alias) 4717 4718 return this 4719 4720 def _parse_id_var( 4721 self, 4722 any_token: bool = True, 4723 tokens: t.Optional[t.Collection[TokenType]] = None, 4724 ) -> t.Optional[exp.Expression]: 4725 identifier = self._parse_identifier() 4726 4727 if identifier: 4728 return identifier 4729 4730 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4731 quoted = self._prev.token_type == TokenType.STRING 4732 return exp.Identifier(this=self._prev.text, quoted=quoted) 4733 4734 return None 4735 4736 def _parse_string(self) -> t.Optional[exp.Expression]: 4737 if self._match(TokenType.STRING): 4738 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4739 return self._parse_placeholder() 4740 4741 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4742 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4743 4744 def _parse_number(self) -> t.Optional[exp.Expression]: 4745 if self._match(TokenType.NUMBER): 4746 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4747 return self._parse_placeholder() 4748 4749 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4750 if self._match(TokenType.IDENTIFIER): 4751 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4752 return self._parse_placeholder() 4753 4754 def _parse_var( 4755 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4756 ) -> t.Optional[exp.Expression]: 4757 if ( 4758 (any_token and self._advance_any()) 4759 or self._match(TokenType.VAR) 4760 or (self._match_set(tokens) if tokens else False) 4761 ): 4762 return self.expression(exp.Var, this=self._prev.text) 4763 return self._parse_placeholder() 4764 4765 def _advance_any(self) -> t.Optional[Token]: 4766 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4767 self._advance() 4768 return self._prev 4769 return None 4770 4771 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4772 return self._parse_var() or self._parse_string() 4773 4774 def _parse_null(self) -> t.Optional[exp.Expression]: 4775 if self._match_set(self.NULL_TOKENS): 4776 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4777 return self._parse_placeholder() 4778 4779 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4780 if self._match(TokenType.TRUE): 4781 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4782 if self._match(TokenType.FALSE): 4783 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4784 return self._parse_placeholder() 4785 4786 def _parse_star(self) -> t.Optional[exp.Expression]: 4787 if self._match(TokenType.STAR): 4788 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4789 return self._parse_placeholder() 4790 4791 def _parse_parameter(self) -> exp.Parameter: 4792 wrapped = self._match(TokenType.L_BRACE) 4793 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4794 self._match(TokenType.R_BRACE) 4795 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4796 4797 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4798 if self._match_set(self.PLACEHOLDER_PARSERS): 4799 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4800 if placeholder: 4801 return placeholder 4802 self._advance(-1) 4803 return None 4804 4805 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4806 if not self._match(TokenType.EXCEPT): 4807 return None 4808 if self._match(TokenType.L_PAREN, advance=False): 4809 return self._parse_wrapped_csv(self._parse_column) 4810 4811 except_column = self._parse_column() 4812 return [except_column] if except_column else None 4813 4814 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4815 if not self._match(TokenType.REPLACE): 4816 return None 4817 if self._match(TokenType.L_PAREN, advance=False): 4818 return self._parse_wrapped_csv(self._parse_expression) 4819 4820 replace_expression = self._parse_expression() 4821 return [replace_expression] if replace_expression else None 4822 4823 def _parse_csv( 4824 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4825 ) -> t.List[exp.Expression]: 4826 parse_result = parse_method() 4827 items = [parse_result] if parse_result is not None else [] 4828 4829 while self._match(sep): 4830 self._add_comments(parse_result) 4831 parse_result = parse_method() 4832 if parse_result is not None: 4833 items.append(parse_result) 4834 4835 return items 4836 4837 def _parse_tokens( 4838 self, parse_method: t.Callable, expressions: t.Dict 4839 ) -> t.Optional[exp.Expression]: 4840 this = parse_method() 4841 4842 while self._match_set(expressions): 4843 this = self.expression( 4844 expressions[self._prev.token_type], 4845 this=this, 4846 comments=self._prev_comments, 4847 expression=parse_method(), 4848 ) 4849 4850 return this 4851 4852 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4853 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4854 4855 def _parse_wrapped_csv( 4856 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4857 ) -> t.List[exp.Expression]: 4858 return self._parse_wrapped( 4859 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4860 ) 4861 4862 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4863 wrapped = self._match(TokenType.L_PAREN) 4864 if not wrapped and not optional: 4865 self.raise_error("Expecting (") 4866 parse_result = parse_method() 4867 if wrapped: 4868 self._match_r_paren() 4869 return parse_result 4870 4871 def _parse_expressions(self) -> t.List[exp.Expression]: 4872 return self._parse_csv(self._parse_expression) 4873 4874 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4875 return self._parse_select() or self._parse_set_operations( 4876 self._parse_expression() if alias else self._parse_conjunction() 4877 ) 4878 4879 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4880 return self._parse_query_modifiers( 4881 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4882 ) 4883 4884 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4885 this = None 4886 if self._match_texts(self.TRANSACTION_KIND): 4887 this = self._prev.text 4888 4889 self._match_texts({"TRANSACTION", "WORK"}) 4890 4891 modes = [] 4892 while True: 4893 mode = [] 4894 while self._match(TokenType.VAR): 4895 mode.append(self._prev.text) 4896 4897 if mode: 4898 modes.append(" ".join(mode)) 4899 if not self._match(TokenType.COMMA): 4900 break 4901 4902 return self.expression(exp.Transaction, this=this, modes=modes) 4903 4904 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4905 chain = None 4906 savepoint = None 4907 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4908 4909 self._match_texts({"TRANSACTION", "WORK"}) 4910 4911 if self._match_text_seq("TO"): 4912 self._match_text_seq("SAVEPOINT") 4913 savepoint = self._parse_id_var() 4914 4915 if self._match(TokenType.AND): 4916 chain = not self._match_text_seq("NO") 4917 self._match_text_seq("CHAIN") 4918 4919 if is_rollback: 4920 return self.expression(exp.Rollback, savepoint=savepoint) 4921 4922 return self.expression(exp.Commit, chain=chain) 4923 4924 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4925 if not self._match_text_seq("ADD"): 4926 return None 4927 4928 self._match(TokenType.COLUMN) 4929 exists_column = self._parse_exists(not_=True) 4930 expression = self._parse_field_def() 4931 4932 if expression: 4933 expression.set("exists", exists_column) 4934 4935 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4936 if self._match_texts(("FIRST", "AFTER")): 4937 position = self._prev.text 4938 column_position = self.expression( 4939 exp.ColumnPosition, this=self._parse_column(), position=position 4940 ) 4941 expression.set("position", column_position) 4942 4943 return expression 4944 4945 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4946 drop = self._match(TokenType.DROP) and self._parse_drop() 4947 if drop and not isinstance(drop, exp.Command): 4948 drop.set("kind", drop.args.get("kind", "COLUMN")) 4949 return drop 4950 4951 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4952 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4953 return self.expression( 4954 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4955 ) 4956 4957 def _parse_add_constraint(self) -> exp.AddConstraint: 4958 this = None 4959 kind = self._prev.token_type 4960 4961 if kind == TokenType.CONSTRAINT: 4962 this = self._parse_id_var() 4963 4964 if self._match_text_seq("CHECK"): 4965 expression = self._parse_wrapped(self._parse_conjunction) 4966 enforced = self._match_text_seq("ENFORCED") 4967 4968 return self.expression( 4969 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4970 ) 4971 4972 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4973 expression = self._parse_foreign_key() 4974 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4975 expression = self._parse_primary_key() 4976 else: 4977 expression = None 4978 4979 return self.expression(exp.AddConstraint, this=this, expression=expression) 4980 4981 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4982 index = self._index - 1 4983 4984 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4985 return self._parse_csv(self._parse_add_constraint) 4986 4987 self._retreat(index) 4988 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4989 return self._parse_csv(self._parse_field_def) 4990 4991 return self._parse_csv(self._parse_add_column) 4992 4993 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4994 self._match(TokenType.COLUMN) 4995 column = self._parse_field(any_token=True) 4996 4997 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4998 return self.expression(exp.AlterColumn, this=column, drop=True) 4999 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5000 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5001 5002 self._match_text_seq("SET", "DATA") 5003 return self.expression( 5004 exp.AlterColumn, 5005 this=column, 5006 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5007 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5008 using=self._match(TokenType.USING) and self._parse_conjunction(), 5009 ) 5010 5011 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5012 index = self._index - 1 5013 5014 partition_exists = self._parse_exists() 5015 if self._match(TokenType.PARTITION, advance=False): 5016 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5017 5018 self._retreat(index) 5019 return self._parse_csv(self._parse_drop_column) 5020 5021 def _parse_alter_table_rename(self) -> exp.RenameTable: 5022 self._match_text_seq("TO") 5023 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5024 5025 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5026 start = self._prev 5027 5028 if not self._match(TokenType.TABLE): 5029 return self._parse_as_command(start) 5030 5031 exists = self._parse_exists() 5032 only = self._match_text_seq("ONLY") 5033 this = self._parse_table(schema=True) 5034 5035 if self._next: 5036 self._advance() 5037 5038 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5039 if parser: 5040 actions = ensure_list(parser(self)) 5041 5042 if not self._curr: 5043 return self.expression( 5044 exp.AlterTable, 5045 this=this, 5046 exists=exists, 5047 actions=actions, 5048 only=only, 5049 ) 5050 5051 return self._parse_as_command(start) 5052 5053 def _parse_merge(self) -> exp.Merge: 5054 self._match(TokenType.INTO) 5055 target = self._parse_table() 5056 5057 if target and self._match(TokenType.ALIAS, advance=False): 5058 target.set("alias", self._parse_table_alias()) 5059 5060 self._match(TokenType.USING) 5061 using = self._parse_table() 5062 5063 self._match(TokenType.ON) 5064 on = self._parse_conjunction() 5065 5066 return self.expression( 5067 exp.Merge, 5068 this=target, 5069 using=using, 5070 on=on, 5071 expressions=self._parse_when_matched(), 5072 ) 5073 5074 def _parse_when_matched(self) -> t.List[exp.When]: 5075 whens = [] 5076 5077 while self._match(TokenType.WHEN): 5078 matched = not self._match(TokenType.NOT) 5079 self._match_text_seq("MATCHED") 5080 source = ( 5081 False 5082 if self._match_text_seq("BY", "TARGET") 5083 else self._match_text_seq("BY", "SOURCE") 5084 ) 5085 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5086 5087 self._match(TokenType.THEN) 5088 5089 if self._match(TokenType.INSERT): 5090 _this = self._parse_star() 5091 if _this: 5092 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5093 else: 5094 then = self.expression( 5095 exp.Insert, 5096 this=self._parse_value(), 5097 expression=self._match(TokenType.VALUES) and self._parse_value(), 5098 ) 5099 elif self._match(TokenType.UPDATE): 5100 expressions = self._parse_star() 5101 if expressions: 5102 then = self.expression(exp.Update, expressions=expressions) 5103 else: 5104 then = self.expression( 5105 exp.Update, 5106 expressions=self._match(TokenType.SET) 5107 and self._parse_csv(self._parse_equality), 5108 ) 5109 elif self._match(TokenType.DELETE): 5110 then = self.expression(exp.Var, this=self._prev.text) 5111 else: 5112 then = None 5113 5114 whens.append( 5115 self.expression( 5116 exp.When, 5117 matched=matched, 5118 source=source, 5119 condition=condition, 5120 then=then, 5121 ) 5122 ) 5123 return whens 5124 5125 def _parse_show(self) -> t.Optional[exp.Expression]: 5126 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5127 if parser: 5128 return parser(self) 5129 return self._parse_as_command(self._prev) 5130 5131 def _parse_set_item_assignment( 5132 self, kind: t.Optional[str] = None 5133 ) -> t.Optional[exp.Expression]: 5134 index = self._index 5135 5136 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5137 return self._parse_set_transaction(global_=kind == "GLOBAL") 5138 5139 left = self._parse_primary() or self._parse_id_var() 5140 assignment_delimiter = self._match_texts(("=", "TO")) 5141 5142 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5143 self._retreat(index) 5144 return None 5145 5146 right = self._parse_statement() or self._parse_id_var() 5147 this = self.expression(exp.EQ, this=left, expression=right) 5148 5149 return self.expression(exp.SetItem, this=this, kind=kind) 5150 5151 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5152 self._match_text_seq("TRANSACTION") 5153 characteristics = self._parse_csv( 5154 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5155 ) 5156 return self.expression( 5157 exp.SetItem, 5158 expressions=characteristics, 5159 kind="TRANSACTION", 5160 **{"global": global_}, # type: ignore 5161 ) 5162 5163 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5164 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5165 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5166 5167 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5168 index = self._index 5169 set_ = self.expression( 5170 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5171 ) 5172 5173 if self._curr: 5174 self._retreat(index) 5175 return self._parse_as_command(self._prev) 5176 5177 return set_ 5178 5179 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5180 for option in options: 5181 if self._match_text_seq(*option.split(" ")): 5182 return exp.var(option) 5183 return None 5184 5185 def _parse_as_command(self, start: Token) -> exp.Command: 5186 while self._curr: 5187 self._advance() 5188 text = self._find_sql(start, self._prev) 5189 size = len(start.text) 5190 return exp.Command(this=text[:size], expression=text[size:]) 5191 5192 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5193 settings = [] 5194 5195 self._match_l_paren() 5196 kind = self._parse_id_var() 5197 5198 if self._match(TokenType.L_PAREN): 5199 while True: 5200 key = self._parse_id_var() 5201 value = self._parse_primary() 5202 5203 if not key and value is None: 5204 break 5205 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5206 self._match(TokenType.R_PAREN) 5207 5208 self._match_r_paren() 5209 5210 return self.expression( 5211 exp.DictProperty, 5212 this=this, 5213 kind=kind.this if kind else None, 5214 settings=settings, 5215 ) 5216 5217 def _parse_dict_range(self, this: str) -> exp.DictRange: 5218 self._match_l_paren() 5219 has_min = self._match_text_seq("MIN") 5220 if has_min: 5221 min = self._parse_var() or self._parse_primary() 5222 self._match_text_seq("MAX") 5223 max = self._parse_var() or self._parse_primary() 5224 else: 5225 max = self._parse_var() or self._parse_primary() 5226 min = exp.Literal.number(0) 5227 self._match_r_paren() 5228 return self.expression(exp.DictRange, this=this, min=min, max=max) 5229 5230 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5231 index = self._index 5232 expression = self._parse_column() 5233 if not self._match(TokenType.IN): 5234 self._retreat(index - 1) 5235 return None 5236 iterator = self._parse_column() 5237 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5238 return self.expression( 5239 exp.Comprehension, 5240 this=this, 5241 expression=expression, 5242 iterator=iterator, 5243 condition=condition, 5244 ) 5245 5246 def _find_parser( 5247 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5248 ) -> t.Optional[t.Callable]: 5249 if not self._curr: 5250 return None 5251 5252 index = self._index 5253 this = [] 5254 while True: 5255 # The current token might be multiple words 5256 curr = self._curr.text.upper() 5257 key = curr.split(" ") 5258 this.append(curr) 5259 5260 self._advance() 5261 result, trie = in_trie(trie, key) 5262 if result == TrieResult.FAILED: 5263 break 5264 5265 if result == TrieResult.EXISTS: 5266 subparser = parsers[" ".join(this)] 5267 return subparser 5268 5269 self._retreat(index) 5270 return None 5271 5272 def _match(self, token_type, advance=True, expression=None): 5273 if not self._curr: 5274 return None 5275 5276 if self._curr.token_type == token_type: 5277 if advance: 5278 self._advance() 5279 self._add_comments(expression) 5280 return True 5281 5282 return None 5283 5284 def _match_set(self, types, advance=True): 5285 if not self._curr: 5286 return None 5287 5288 if self._curr.token_type in types: 5289 if advance: 5290 self._advance() 5291 return True 5292 5293 return None 5294 5295 def _match_pair(self, token_type_a, token_type_b, advance=True): 5296 if not self._curr or not self._next: 5297 return None 5298 5299 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5300 if advance: 5301 self._advance(2) 5302 return True 5303 5304 return None 5305 5306 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5307 if not self._match(TokenType.L_PAREN, expression=expression): 5308 self.raise_error("Expecting (") 5309 5310 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5311 if not self._match(TokenType.R_PAREN, expression=expression): 5312 self.raise_error("Expecting )") 5313 5314 def _match_texts(self, texts, advance=True): 5315 if self._curr and self._curr.text.upper() in texts: 5316 if advance: 5317 self._advance() 5318 return True 5319 return False 5320 5321 def _match_text_seq(self, *texts, advance=True): 5322 index = self._index 5323 for text in texts: 5324 if self._curr and self._curr.text.upper() == text: 5325 self._advance() 5326 else: 5327 self._retreat(index) 5328 return False 5329 5330 if not advance: 5331 self._retreat(index) 5332 5333 return True 5334 5335 @t.overload 5336 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5337 ... 5338 5339 @t.overload 5340 def _replace_columns_with_dots( 5341 self, this: t.Optional[exp.Expression] 5342 ) -> t.Optional[exp.Expression]: 5343 ... 5344 5345 def _replace_columns_with_dots(self, this): 5346 if isinstance(this, exp.Dot): 5347 exp.replace_children(this, self._replace_columns_with_dots) 5348 elif isinstance(this, exp.Column): 5349 exp.replace_children(this, self._replace_columns_with_dots) 5350 table = this.args.get("table") 5351 this = ( 5352 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5353 ) 5354 5355 return this 5356 5357 def _replace_lambda( 5358 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5359 ) -> t.Optional[exp.Expression]: 5360 if not node: 5361 return node 5362 5363 for column in node.find_all(exp.Column): 5364 if column.parts[0].name in lambda_variables: 5365 dot_or_id = column.to_dot() if column.table else column.this 5366 parent = column.parent 5367 5368 while isinstance(parent, exp.Dot): 5369 if not isinstance(parent.parent, exp.Dot): 5370 parent.replace(dot_or_id) 5371 break 5372 parent = parent.parent 5373 else: 5374 if column is node: 5375 node = dot_or_id 5376 else: 5377 column.replace(dot_or_id) 5378 return node 5379 5380 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5381 return [ 5382 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5383 for value in values 5384 if value 5385 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMP_S, 165 TokenType.TIMESTAMP_MS, 166 TokenType.TIMESTAMP_NS, 167 TokenType.TIMESTAMPTZ, 168 TokenType.TIMESTAMPLTZ, 169 TokenType.DATETIME, 170 TokenType.DATETIME64, 171 TokenType.DATE, 172 TokenType.INT4RANGE, 173 TokenType.INT4MULTIRANGE, 174 TokenType.INT8RANGE, 175 TokenType.INT8MULTIRANGE, 176 TokenType.NUMRANGE, 177 TokenType.NUMMULTIRANGE, 178 TokenType.TSRANGE, 179 TokenType.TSMULTIRANGE, 180 TokenType.TSTZRANGE, 181 TokenType.TSTZMULTIRANGE, 182 TokenType.DATERANGE, 183 TokenType.DATEMULTIRANGE, 184 TokenType.DECIMAL, 185 TokenType.UDECIMAL, 186 TokenType.BIGDECIMAL, 187 TokenType.UUID, 188 TokenType.GEOGRAPHY, 189 TokenType.GEOMETRY, 190 TokenType.HLLSKETCH, 191 TokenType.HSTORE, 192 TokenType.PSEUDO_TYPE, 193 TokenType.SUPER, 194 TokenType.SERIAL, 195 TokenType.SMALLSERIAL, 196 TokenType.BIGSERIAL, 197 TokenType.XML, 198 TokenType.YEAR, 199 TokenType.UNIQUEIDENTIFIER, 200 TokenType.USERDEFINED, 201 TokenType.MONEY, 202 TokenType.SMALLMONEY, 203 TokenType.ROWVERSION, 204 TokenType.IMAGE, 205 TokenType.VARIANT, 206 TokenType.OBJECT, 207 TokenType.OBJECT_IDENTIFIER, 208 TokenType.INET, 209 TokenType.IPADDRESS, 210 TokenType.IPPREFIX, 211 TokenType.UNKNOWN, 212 TokenType.NULL, 213 *ENUM_TYPE_TOKENS, 214 *NESTED_TYPE_TOKENS, 215 } 216 217 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 218 TokenType.BIGINT: TokenType.UBIGINT, 219 TokenType.INT: TokenType.UINT, 220 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 221 TokenType.SMALLINT: TokenType.USMALLINT, 222 TokenType.TINYINT: TokenType.UTINYINT, 223 TokenType.DECIMAL: TokenType.UDECIMAL, 224 } 225 226 SUBQUERY_PREDICATES = { 227 TokenType.ANY: exp.Any, 228 TokenType.ALL: exp.All, 229 TokenType.EXISTS: exp.Exists, 230 TokenType.SOME: exp.Any, 231 } 232 233 RESERVED_KEYWORDS = { 234 *Tokenizer.SINGLE_TOKENS.values(), 235 TokenType.SELECT, 236 } 237 238 DB_CREATABLES = { 239 TokenType.DATABASE, 240 TokenType.SCHEMA, 241 TokenType.TABLE, 242 TokenType.VIEW, 243 TokenType.MODEL, 244 TokenType.DICTIONARY, 245 } 246 247 CREATABLES = { 248 TokenType.COLUMN, 249 TokenType.FUNCTION, 250 TokenType.INDEX, 251 TokenType.PROCEDURE, 252 *DB_CREATABLES, 253 } 254 255 # Tokens that can represent identifiers 256 ID_VAR_TOKENS = { 257 TokenType.VAR, 258 TokenType.ANTI, 259 TokenType.APPLY, 260 TokenType.ASC, 261 TokenType.AUTO_INCREMENT, 262 TokenType.BEGIN, 263 TokenType.CACHE, 264 TokenType.CASE, 265 TokenType.COLLATE, 266 TokenType.COMMAND, 267 TokenType.COMMENT, 268 TokenType.COMMIT, 269 TokenType.CONSTRAINT, 270 TokenType.DEFAULT, 271 TokenType.DELETE, 272 TokenType.DESC, 273 TokenType.DESCRIBE, 274 TokenType.DICTIONARY, 275 TokenType.DIV, 276 TokenType.END, 277 TokenType.EXECUTE, 278 TokenType.ESCAPE, 279 TokenType.FALSE, 280 TokenType.FIRST, 281 TokenType.FILTER, 282 TokenType.FORMAT, 283 TokenType.FULL, 284 TokenType.IS, 285 TokenType.ISNULL, 286 TokenType.INTERVAL, 287 TokenType.KEEP, 288 TokenType.KILL, 289 TokenType.LEFT, 290 TokenType.LOAD, 291 TokenType.MERGE, 292 TokenType.NATURAL, 293 TokenType.NEXT, 294 TokenType.OFFSET, 295 TokenType.ORDINALITY, 296 TokenType.OVERLAPS, 297 TokenType.OVERWRITE, 298 TokenType.PARTITION, 299 TokenType.PERCENT, 300 TokenType.PIVOT, 301 TokenType.PRAGMA, 302 TokenType.RANGE, 303 TokenType.REFERENCES, 304 TokenType.RIGHT, 305 TokenType.ROW, 306 TokenType.ROWS, 307 TokenType.SEMI, 308 TokenType.SET, 309 TokenType.SETTINGS, 310 TokenType.SHOW, 311 TokenType.TEMPORARY, 312 TokenType.TOP, 313 TokenType.TRUE, 314 TokenType.UNIQUE, 315 TokenType.UNPIVOT, 316 TokenType.UPDATE, 317 TokenType.USE, 318 TokenType.VOLATILE, 319 TokenType.WINDOW, 320 *CREATABLES, 321 *SUBQUERY_PREDICATES, 322 *TYPE_TOKENS, 323 *NO_PAREN_FUNCTIONS, 324 } 325 326 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 327 328 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 329 TokenType.ANTI, 330 TokenType.APPLY, 331 TokenType.ASOF, 332 TokenType.FULL, 333 TokenType.LEFT, 334 TokenType.LOCK, 335 TokenType.NATURAL, 336 TokenType.OFFSET, 337 TokenType.RIGHT, 338 TokenType.SEMI, 339 TokenType.WINDOW, 340 } 341 342 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 343 344 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 345 346 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 347 348 FUNC_TOKENS = { 349 TokenType.COLLATE, 350 TokenType.COMMAND, 351 TokenType.CURRENT_DATE, 352 TokenType.CURRENT_DATETIME, 353 TokenType.CURRENT_TIMESTAMP, 354 TokenType.CURRENT_TIME, 355 TokenType.CURRENT_USER, 356 TokenType.FILTER, 357 TokenType.FIRST, 358 TokenType.FORMAT, 359 TokenType.GLOB, 360 TokenType.IDENTIFIER, 361 TokenType.INDEX, 362 TokenType.ISNULL, 363 TokenType.ILIKE, 364 TokenType.INSERT, 365 TokenType.LIKE, 366 TokenType.MERGE, 367 TokenType.OFFSET, 368 TokenType.PRIMARY_KEY, 369 TokenType.RANGE, 370 TokenType.REPLACE, 371 TokenType.RLIKE, 372 TokenType.ROW, 373 TokenType.UNNEST, 374 TokenType.VAR, 375 TokenType.LEFT, 376 TokenType.RIGHT, 377 TokenType.DATE, 378 TokenType.DATETIME, 379 TokenType.TABLE, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.WINDOW, 383 TokenType.XOR, 384 *TYPE_TOKENS, 385 *SUBQUERY_PREDICATES, 386 } 387 388 CONJUNCTION = { 389 TokenType.AND: exp.And, 390 TokenType.OR: exp.Or, 391 } 392 393 EQUALITY = { 394 TokenType.EQ: exp.EQ, 395 TokenType.NEQ: exp.NEQ, 396 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 397 } 398 399 COMPARISON = { 400 TokenType.GT: exp.GT, 401 TokenType.GTE: exp.GTE, 402 TokenType.LT: exp.LT, 403 TokenType.LTE: exp.LTE, 404 } 405 406 BITWISE = { 407 TokenType.AMP: exp.BitwiseAnd, 408 TokenType.CARET: exp.BitwiseXor, 409 TokenType.PIPE: exp.BitwiseOr, 410 TokenType.DPIPE: exp.DPipe, 411 } 412 413 TERM = { 414 TokenType.DASH: exp.Sub, 415 TokenType.PLUS: exp.Add, 416 TokenType.MOD: exp.Mod, 417 TokenType.COLLATE: exp.Collate, 418 } 419 420 FACTOR = { 421 TokenType.DIV: exp.IntDiv, 422 TokenType.LR_ARROW: exp.Distance, 423 TokenType.SLASH: exp.Div, 424 TokenType.STAR: exp.Mul, 425 } 426 427 TIMES = { 428 TokenType.TIME, 429 TokenType.TIMETZ, 430 } 431 432 TIMESTAMPS = { 433 TokenType.TIMESTAMP, 434 TokenType.TIMESTAMPTZ, 435 TokenType.TIMESTAMPLTZ, 436 *TIMES, 437 } 438 439 SET_OPERATIONS = { 440 TokenType.UNION, 441 TokenType.INTERSECT, 442 TokenType.EXCEPT, 443 } 444 445 JOIN_METHODS = { 446 TokenType.NATURAL, 447 TokenType.ASOF, 448 } 449 450 JOIN_SIDES = { 451 TokenType.LEFT, 452 TokenType.RIGHT, 453 TokenType.FULL, 454 } 455 456 JOIN_KINDS = { 457 TokenType.INNER, 458 TokenType.OUTER, 459 TokenType.CROSS, 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 JOIN_HINTS: t.Set[str] = set() 465 466 LAMBDAS = { 467 TokenType.ARROW: lambda self, expressions: self.expression( 468 exp.Lambda, 469 this=self._replace_lambda( 470 self._parse_conjunction(), 471 {node.name for node in expressions}, 472 ), 473 expressions=expressions, 474 ), 475 TokenType.FARROW: lambda self, expressions: self.expression( 476 exp.Kwarg, 477 this=exp.var(expressions[0].name), 478 expression=self._parse_conjunction(), 479 ), 480 } 481 482 COLUMN_OPERATORS = { 483 TokenType.DOT: None, 484 TokenType.DCOLON: lambda self, this, to: self.expression( 485 exp.Cast if self.STRICT_CAST else exp.TryCast, 486 this=this, 487 to=to, 488 ), 489 TokenType.ARROW: lambda self, this, path: self.expression( 490 exp.JSONExtract, 491 this=this, 492 expression=path, 493 ), 494 TokenType.DARROW: lambda self, this, path: self.expression( 495 exp.JSONExtractScalar, 496 this=this, 497 expression=path, 498 ), 499 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtract, 501 this=this, 502 expression=path, 503 ), 504 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 505 exp.JSONBExtractScalar, 506 this=this, 507 expression=path, 508 ), 509 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 510 exp.JSONBContains, 511 this=this, 512 expression=key, 513 ), 514 } 515 516 EXPRESSION_PARSERS = { 517 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 518 exp.Column: lambda self: self._parse_column(), 519 exp.Condition: lambda self: self._parse_conjunction(), 520 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 521 exp.Expression: lambda self: self._parse_statement(), 522 exp.From: lambda self: self._parse_from(), 523 exp.Group: lambda self: self._parse_group(), 524 exp.Having: lambda self: self._parse_having(), 525 exp.Identifier: lambda self: self._parse_id_var(), 526 exp.Join: lambda self: self._parse_join(), 527 exp.Lambda: lambda self: self._parse_lambda(), 528 exp.Lateral: lambda self: self._parse_lateral(), 529 exp.Limit: lambda self: self._parse_limit(), 530 exp.Offset: lambda self: self._parse_offset(), 531 exp.Order: lambda self: self._parse_order(), 532 exp.Ordered: lambda self: self._parse_ordered(), 533 exp.Properties: lambda self: self._parse_properties(), 534 exp.Qualify: lambda self: self._parse_qualify(), 535 exp.Returning: lambda self: self._parse_returning(), 536 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 537 exp.Table: lambda self: self._parse_table_parts(), 538 exp.TableAlias: lambda self: self._parse_table_alias(), 539 exp.Where: lambda self: self._parse_where(), 540 exp.Window: lambda self: self._parse_named_window(), 541 exp.With: lambda self: self._parse_with(), 542 "JOIN_TYPE": lambda self: self._parse_join_parts(), 543 } 544 545 STATEMENT_PARSERS = { 546 TokenType.ALTER: lambda self: self._parse_alter(), 547 TokenType.BEGIN: lambda self: self._parse_transaction(), 548 TokenType.CACHE: lambda self: self._parse_cache(), 549 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 550 TokenType.COMMENT: lambda self: self._parse_comment(), 551 TokenType.CREATE: lambda self: self._parse_create(), 552 TokenType.DELETE: lambda self: self._parse_delete(), 553 TokenType.DESC: lambda self: self._parse_describe(), 554 TokenType.DESCRIBE: lambda self: self._parse_describe(), 555 TokenType.DROP: lambda self: self._parse_drop(), 556 TokenType.INSERT: lambda self: self._parse_insert(), 557 TokenType.KILL: lambda self: self._parse_kill(), 558 TokenType.LOAD: lambda self: self._parse_load(), 559 TokenType.MERGE: lambda self: self._parse_merge(), 560 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 561 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 562 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 563 TokenType.SET: lambda self: self._parse_set(), 564 TokenType.UNCACHE: lambda self: self._parse_uncache(), 565 TokenType.UPDATE: lambda self: self._parse_update(), 566 TokenType.USE: lambda self: self.expression( 567 exp.Use, 568 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 569 and exp.var(self._prev.text), 570 this=self._parse_table(schema=False), 571 ), 572 } 573 574 UNARY_PARSERS = { 575 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 576 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 577 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 578 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 579 } 580 581 PRIMARY_PARSERS = { 582 TokenType.STRING: lambda self, token: self.expression( 583 exp.Literal, this=token.text, is_string=True 584 ), 585 TokenType.NUMBER: lambda self, token: self.expression( 586 exp.Literal, this=token.text, is_string=False 587 ), 588 TokenType.STAR: lambda self, _: self.expression( 589 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 590 ), 591 TokenType.NULL: lambda self, _: self.expression(exp.Null), 592 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 593 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 594 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 595 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 596 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 597 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 598 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 599 exp.National, this=token.text 600 ), 601 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 602 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 603 exp.RawString, this=token.text 604 ), 605 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 606 } 607 608 PLACEHOLDER_PARSERS = { 609 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 610 TokenType.PARAMETER: lambda self: self._parse_parameter(), 611 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 612 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 613 else None, 614 } 615 616 RANGE_PARSERS = { 617 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 618 TokenType.GLOB: binary_range_parser(exp.Glob), 619 TokenType.ILIKE: binary_range_parser(exp.ILike), 620 TokenType.IN: lambda self, this: self._parse_in(this), 621 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 622 TokenType.IS: lambda self, this: self._parse_is(this), 623 TokenType.LIKE: binary_range_parser(exp.Like), 624 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 625 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 626 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 627 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 628 } 629 630 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 631 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 632 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 633 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 634 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 635 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 636 "CHECKSUM": lambda self: self._parse_checksum(), 637 "CLUSTER BY": lambda self: self._parse_cluster(), 638 "CLUSTERED": lambda self: self._parse_clustered_by(), 639 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 640 exp.CollateProperty, **kwargs 641 ), 642 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 643 "COPY": lambda self: self._parse_copy_property(), 644 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 645 "DEFINER": lambda self: self._parse_definer(), 646 "DETERMINISTIC": lambda self: self.expression( 647 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 648 ), 649 "DISTKEY": lambda self: self._parse_distkey(), 650 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 651 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 652 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 653 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 654 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 655 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 656 "FREESPACE": lambda self: self._parse_freespace(), 657 "HEAP": lambda self: self.expression(exp.HeapProperty), 658 "IMMUTABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 660 ), 661 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 662 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 663 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 664 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 665 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 666 "LIKE": lambda self: self._parse_create_like(), 667 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 668 "LOCK": lambda self: self._parse_locking(), 669 "LOCKING": lambda self: self._parse_locking(), 670 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 671 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 672 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 673 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 674 "NO": lambda self: self._parse_no_property(), 675 "ON": lambda self: self._parse_on_property(), 676 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 677 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 678 "PARTITION BY": lambda self: self._parse_partitioned_by(), 679 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 680 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 681 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 682 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 683 "REMOTE": lambda self: self._parse_remote_with_connection(), 684 "RETURNS": lambda self: self._parse_returns(), 685 "ROW": lambda self: self._parse_row(), 686 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 687 "SAMPLE": lambda self: self.expression( 688 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 689 ), 690 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 691 "SETTINGS": lambda self: self.expression( 692 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 693 ), 694 "SORTKEY": lambda self: self._parse_sortkey(), 695 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 696 "STABLE": lambda self: self.expression( 697 exp.StabilityProperty, this=exp.Literal.string("STABLE") 698 ), 699 "STORED": lambda self: self._parse_stored(), 700 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 701 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 702 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 703 "TO": lambda self: self._parse_to_table(), 704 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 705 "TRANSFORM": lambda self: self.expression( 706 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 707 ), 708 "TTL": lambda self: self._parse_ttl(), 709 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 710 "VOLATILE": lambda self: self._parse_volatile_property(), 711 "WITH": lambda self: self._parse_with_property(), 712 } 713 714 CONSTRAINT_PARSERS = { 715 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 716 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 717 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 718 "CHARACTER SET": lambda self: self.expression( 719 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "CHECK": lambda self: self.expression( 722 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 723 ), 724 "COLLATE": lambda self: self.expression( 725 exp.CollateColumnConstraint, this=self._parse_var() 726 ), 727 "COMMENT": lambda self: self.expression( 728 exp.CommentColumnConstraint, this=self._parse_string() 729 ), 730 "COMPRESS": lambda self: self._parse_compress(), 731 "CLUSTERED": lambda self: self.expression( 732 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 733 ), 734 "NONCLUSTERED": lambda self: self.expression( 735 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 736 ), 737 "DEFAULT": lambda self: self.expression( 738 exp.DefaultColumnConstraint, this=self._parse_bitwise() 739 ), 740 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 741 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 742 "FORMAT": lambda self: self.expression( 743 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 744 ), 745 "GENERATED": lambda self: self._parse_generated_as_identity(), 746 "IDENTITY": lambda self: self._parse_auto_increment(), 747 "INLINE": lambda self: self._parse_inline(), 748 "LIKE": lambda self: self._parse_create_like(), 749 "NOT": lambda self: self._parse_not_constraint(), 750 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 751 "ON": lambda self: ( 752 self._match(TokenType.UPDATE) 753 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 754 ) 755 or self.expression(exp.OnProperty, this=self._parse_id_var()), 756 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 757 "PRIMARY KEY": lambda self: self._parse_primary_key(), 758 "REFERENCES": lambda self: self._parse_references(match=False), 759 "TITLE": lambda self: self.expression( 760 exp.TitleColumnConstraint, this=self._parse_var_or_string() 761 ), 762 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 763 "UNIQUE": lambda self: self._parse_unique(), 764 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 765 "WITH": lambda self: self.expression( 766 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 767 ), 768 } 769 770 ALTER_PARSERS = { 771 "ADD": lambda self: self._parse_alter_table_add(), 772 "ALTER": lambda self: self._parse_alter_table_alter(), 773 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 774 "DROP": lambda self: self._parse_alter_table_drop(), 775 "RENAME": lambda self: self._parse_alter_table_rename(), 776 } 777 778 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 779 780 NO_PAREN_FUNCTION_PARSERS = { 781 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 782 "CASE": lambda self: self._parse_case(), 783 "IF": lambda self: self._parse_if(), 784 "NEXT": lambda self: self._parse_next_value_for(), 785 } 786 787 INVALID_FUNC_NAME_TOKENS = { 788 TokenType.IDENTIFIER, 789 TokenType.STRING, 790 } 791 792 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 793 794 FUNCTION_PARSERS = { 795 "ANY_VALUE": lambda self: self._parse_any_value(), 796 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 797 "CONCAT": lambda self: self._parse_concat(), 798 "CONCAT_WS": lambda self: self._parse_concat_ws(), 799 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 800 "DECODE": lambda self: self._parse_decode(), 801 "EXTRACT": lambda self: self._parse_extract(), 802 "JSON_OBJECT": lambda self: self._parse_json_object(), 803 "JSON_TABLE": lambda self: self._parse_json_table(), 804 "LOG": lambda self: self._parse_logarithm(), 805 "MATCH": lambda self: self._parse_match_against(), 806 "OPENJSON": lambda self: self._parse_open_json(), 807 "POSITION": lambda self: self._parse_position(), 808 "PREDICT": lambda self: self._parse_predict(), 809 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 810 "STRING_AGG": lambda self: self._parse_string_agg(), 811 "SUBSTRING": lambda self: self._parse_substring(), 812 "TRIM": lambda self: self._parse_trim(), 813 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 814 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 815 } 816 817 QUERY_MODIFIER_PARSERS = { 818 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 819 TokenType.WHERE: lambda self: ("where", self._parse_where()), 820 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 821 TokenType.HAVING: lambda self: ("having", self._parse_having()), 822 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 823 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 824 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 825 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 826 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 827 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 828 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 829 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 830 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 831 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 832 TokenType.CLUSTER_BY: lambda self: ( 833 "cluster", 834 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 835 ), 836 TokenType.DISTRIBUTE_BY: lambda self: ( 837 "distribute", 838 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 839 ), 840 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 841 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 842 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 843 } 844 845 SET_PARSERS = { 846 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 847 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 848 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 849 "TRANSACTION": lambda self: self._parse_set_transaction(), 850 } 851 852 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 853 854 TYPE_LITERAL_PARSERS = { 855 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 856 } 857 858 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 859 860 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 861 862 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 863 864 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 865 TRANSACTION_CHARACTERISTICS = { 866 "ISOLATION LEVEL REPEATABLE READ", 867 "ISOLATION LEVEL READ COMMITTED", 868 "ISOLATION LEVEL READ UNCOMMITTED", 869 "ISOLATION LEVEL SERIALIZABLE", 870 "READ WRITE", 871 "READ ONLY", 872 } 873 874 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 875 876 CLONE_KEYWORDS = {"CLONE", "COPY"} 877 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 878 879 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 880 881 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 882 883 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 884 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 885 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 886 887 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 888 889 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 890 891 DISTINCT_TOKENS = {TokenType.DISTINCT} 892 893 NULL_TOKENS = {TokenType.NULL} 894 895 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 896 897 STRICT_CAST = True 898 899 # A NULL arg in CONCAT yields NULL by default 900 CONCAT_NULL_OUTPUTS_STRING = False 901 902 PREFIXED_PIVOT_COLUMNS = False 903 IDENTIFY_PIVOT_STRINGS = False 904 905 LOG_BASE_FIRST = True 906 LOG_DEFAULTS_TO_LN = False 907 908 # Whether or not ADD is present for each column added by ALTER TABLE 909 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 910 911 # Whether or not the table sample clause expects CSV syntax 912 TABLESAMPLE_CSV = False 913 914 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 915 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 916 917 # Whether the TRIM function expects the characters to trim as its first argument 918 TRIM_PATTERN_FIRST = False 919 920 __slots__ = ( 921 "error_level", 922 "error_message_context", 923 "max_errors", 924 "sql", 925 "errors", 926 "_tokens", 927 "_index", 928 "_curr", 929 "_next", 930 "_prev", 931 "_prev_comments", 932 "_tokenizer", 933 ) 934 935 # Autofilled 936 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 937 INDEX_OFFSET: int = 0 938 UNNEST_COLUMN_ONLY: bool = False 939 ALIAS_POST_TABLESAMPLE: bool = False 940 STRICT_STRING_CONCAT = False 941 SUPPORTS_USER_DEFINED_TYPES = True 942 NORMALIZE_FUNCTIONS = "upper" 943 NULL_ORDERING: str = "nulls_are_small" 944 SHOW_TRIE: t.Dict = {} 945 SET_TRIE: t.Dict = {} 946 FORMAT_MAPPING: t.Dict[str, str] = {} 947 FORMAT_TRIE: t.Dict = {} 948 TIME_MAPPING: t.Dict[str, str] = {} 949 TIME_TRIE: t.Dict = {} 950 951 def __init__( 952 self, 953 error_level: t.Optional[ErrorLevel] = None, 954 error_message_context: int = 100, 955 max_errors: int = 3, 956 ): 957 self.error_level = error_level or ErrorLevel.IMMEDIATE 958 self.error_message_context = error_message_context 959 self.max_errors = max_errors 960 self._tokenizer = self.TOKENIZER_CLASS() 961 self.reset() 962 963 def reset(self): 964 self.sql = "" 965 self.errors = [] 966 self._tokens = [] 967 self._index = 0 968 self._curr = None 969 self._next = None 970 self._prev = None 971 self._prev_comments = None 972 973 def parse( 974 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 975 ) -> t.List[t.Optional[exp.Expression]]: 976 """ 977 Parses a list of tokens and returns a list of syntax trees, one tree 978 per parsed SQL statement. 979 980 Args: 981 raw_tokens: The list of tokens. 982 sql: The original SQL string, used to produce helpful debug messages. 983 984 Returns: 985 The list of the produced syntax trees. 986 """ 987 return self._parse( 988 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 989 ) 990 991 def parse_into( 992 self, 993 expression_types: exp.IntoType, 994 raw_tokens: t.List[Token], 995 sql: t.Optional[str] = None, 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens into a given Expression type. If a collection of Expression 999 types is given instead, this method will try to parse the token list into each one 1000 of them, stopping at the first for which the parsing succeeds. 1001 1002 Args: 1003 expression_types: The expression type(s) to try and parse the token list into. 1004 raw_tokens: The list of tokens. 1005 sql: The original SQL string, used to produce helpful debug messages. 1006 1007 Returns: 1008 The target Expression. 1009 """ 1010 errors = [] 1011 for expression_type in ensure_list(expression_types): 1012 parser = self.EXPRESSION_PARSERS.get(expression_type) 1013 if not parser: 1014 raise TypeError(f"No parser registered for {expression_type}") 1015 1016 try: 1017 return self._parse(parser, raw_tokens, sql) 1018 except ParseError as e: 1019 e.errors[0]["into_expression"] = expression_type 1020 errors.append(e) 1021 1022 raise ParseError( 1023 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1024 errors=merge_errors(errors), 1025 ) from errors[-1] 1026 1027 def _parse( 1028 self, 1029 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1030 raw_tokens: t.List[Token], 1031 sql: t.Optional[str] = None, 1032 ) -> t.List[t.Optional[exp.Expression]]: 1033 self.reset() 1034 self.sql = sql or "" 1035 1036 total = len(raw_tokens) 1037 chunks: t.List[t.List[Token]] = [[]] 1038 1039 for i, token in enumerate(raw_tokens): 1040 if token.token_type == TokenType.SEMICOLON: 1041 if i < total - 1: 1042 chunks.append([]) 1043 else: 1044 chunks[-1].append(token) 1045 1046 expressions = [] 1047 1048 for tokens in chunks: 1049 self._index = -1 1050 self._tokens = tokens 1051 self._advance() 1052 1053 expressions.append(parse_method(self)) 1054 1055 if self._index < len(self._tokens): 1056 self.raise_error("Invalid expression / Unexpected token") 1057 1058 self.check_errors() 1059 1060 return expressions 1061 1062 def check_errors(self) -> None: 1063 """Logs or raises any found errors, depending on the chosen error level setting.""" 1064 if self.error_level == ErrorLevel.WARN: 1065 for error in self.errors: 1066 logger.error(str(error)) 1067 elif self.error_level == ErrorLevel.RAISE and self.errors: 1068 raise ParseError( 1069 concat_messages(self.errors, self.max_errors), 1070 errors=merge_errors(self.errors), 1071 ) 1072 1073 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1074 """ 1075 Appends an error in the list of recorded errors or raises it, depending on the chosen 1076 error level setting. 1077 """ 1078 token = token or self._curr or self._prev or Token.string("") 1079 start = token.start 1080 end = token.end + 1 1081 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1082 highlight = self.sql[start:end] 1083 end_context = self.sql[end : end + self.error_message_context] 1084 1085 error = ParseError.new( 1086 f"{message}. Line {token.line}, Col: {token.col}.\n" 1087 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1088 description=message, 1089 line=token.line, 1090 col=token.col, 1091 start_context=start_context, 1092 highlight=highlight, 1093 end_context=end_context, 1094 ) 1095 1096 if self.error_level == ErrorLevel.IMMEDIATE: 1097 raise error 1098 1099 self.errors.append(error) 1100 1101 def expression( 1102 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1103 ) -> E: 1104 """ 1105 Creates a new, validated Expression. 1106 1107 Args: 1108 exp_class: The expression class to instantiate. 1109 comments: An optional list of comments to attach to the expression. 1110 kwargs: The arguments to set for the expression along with their respective values. 1111 1112 Returns: 1113 The target expression. 1114 """ 1115 instance = exp_class(**kwargs) 1116 instance.add_comments(comments) if comments else self._add_comments(instance) 1117 return self.validate_expression(instance) 1118 1119 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1120 if expression and self._prev_comments: 1121 expression.add_comments(self._prev_comments) 1122 self._prev_comments = None 1123 1124 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1125 """ 1126 Validates an Expression, making sure that all its mandatory arguments are set. 1127 1128 Args: 1129 expression: The expression to validate. 1130 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1131 1132 Returns: 1133 The validated expression. 1134 """ 1135 if self.error_level != ErrorLevel.IGNORE: 1136 for error_message in expression.error_messages(args): 1137 self.raise_error(error_message) 1138 1139 return expression 1140 1141 def _find_sql(self, start: Token, end: Token) -> str: 1142 return self.sql[start.start : end.end + 1] 1143 1144 def _advance(self, times: int = 1) -> None: 1145 self._index += times 1146 self._curr = seq_get(self._tokens, self._index) 1147 self._next = seq_get(self._tokens, self._index + 1) 1148 1149 if self._index > 0: 1150 self._prev = self._tokens[self._index - 1] 1151 self._prev_comments = self._prev.comments 1152 else: 1153 self._prev = None 1154 self._prev_comments = None 1155 1156 def _retreat(self, index: int) -> None: 1157 if index != self._index: 1158 self._advance(index - self._index) 1159 1160 def _parse_command(self) -> exp.Command: 1161 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1162 1163 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1164 start = self._prev 1165 exists = self._parse_exists() if allow_exists else None 1166 1167 self._match(TokenType.ON) 1168 1169 kind = self._match_set(self.CREATABLES) and self._prev 1170 if not kind: 1171 return self._parse_as_command(start) 1172 1173 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1174 this = self._parse_user_defined_function(kind=kind.token_type) 1175 elif kind.token_type == TokenType.TABLE: 1176 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1177 elif kind.token_type == TokenType.COLUMN: 1178 this = self._parse_column() 1179 else: 1180 this = self._parse_id_var() 1181 1182 self._match(TokenType.IS) 1183 1184 return self.expression( 1185 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1186 ) 1187 1188 def _parse_to_table( 1189 self, 1190 ) -> exp.ToTableProperty: 1191 table = self._parse_table_parts(schema=True) 1192 return self.expression(exp.ToTableProperty, this=table) 1193 1194 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1195 def _parse_ttl(self) -> exp.Expression: 1196 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1197 this = self._parse_bitwise() 1198 1199 if self._match_text_seq("DELETE"): 1200 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1201 if self._match_text_seq("RECOMPRESS"): 1202 return self.expression( 1203 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1204 ) 1205 if self._match_text_seq("TO", "DISK"): 1206 return self.expression( 1207 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1208 ) 1209 if self._match_text_seq("TO", "VOLUME"): 1210 return self.expression( 1211 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1212 ) 1213 1214 return this 1215 1216 expressions = self._parse_csv(_parse_ttl_action) 1217 where = self._parse_where() 1218 group = self._parse_group() 1219 1220 aggregates = None 1221 if group and self._match(TokenType.SET): 1222 aggregates = self._parse_csv(self._parse_set_item) 1223 1224 return self.expression( 1225 exp.MergeTreeTTL, 1226 expressions=expressions, 1227 where=where, 1228 group=group, 1229 aggregates=aggregates, 1230 ) 1231 1232 def _parse_statement(self) -> t.Optional[exp.Expression]: 1233 if self._curr is None: 1234 return None 1235 1236 if self._match_set(self.STATEMENT_PARSERS): 1237 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1238 1239 if self._match_set(Tokenizer.COMMANDS): 1240 return self._parse_command() 1241 1242 expression = self._parse_expression() 1243 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1244 return self._parse_query_modifiers(expression) 1245 1246 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1247 start = self._prev 1248 temporary = self._match(TokenType.TEMPORARY) 1249 materialized = self._match_text_seq("MATERIALIZED") 1250 1251 kind = self._match_set(self.CREATABLES) and self._prev.text 1252 if not kind: 1253 return self._parse_as_command(start) 1254 1255 return self.expression( 1256 exp.Drop, 1257 comments=start.comments, 1258 exists=exists or self._parse_exists(), 1259 this=self._parse_table(schema=True), 1260 kind=kind, 1261 temporary=temporary, 1262 materialized=materialized, 1263 cascade=self._match_text_seq("CASCADE"), 1264 constraints=self._match_text_seq("CONSTRAINTS"), 1265 purge=self._match_text_seq("PURGE"), 1266 ) 1267 1268 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1269 return ( 1270 self._match_text_seq("IF") 1271 and (not not_ or self._match(TokenType.NOT)) 1272 and self._match(TokenType.EXISTS) 1273 ) 1274 1275 def _parse_create(self) -> exp.Create | exp.Command: 1276 # Note: this can't be None because we've matched a statement parser 1277 start = self._prev 1278 comments = self._prev_comments 1279 1280 replace = start.text.upper() == "REPLACE" or self._match_pair( 1281 TokenType.OR, TokenType.REPLACE 1282 ) 1283 unique = self._match(TokenType.UNIQUE) 1284 1285 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1286 self._advance() 1287 1288 properties = None 1289 create_token = self._match_set(self.CREATABLES) and self._prev 1290 1291 if not create_token: 1292 # exp.Properties.Location.POST_CREATE 1293 properties = self._parse_properties() 1294 create_token = self._match_set(self.CREATABLES) and self._prev 1295 1296 if not properties or not create_token: 1297 return self._parse_as_command(start) 1298 1299 exists = self._parse_exists(not_=True) 1300 this = None 1301 expression: t.Optional[exp.Expression] = None 1302 indexes = None 1303 no_schema_binding = None 1304 begin = None 1305 end = None 1306 clone = None 1307 1308 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1309 nonlocal properties 1310 if properties and temp_props: 1311 properties.expressions.extend(temp_props.expressions) 1312 elif temp_props: 1313 properties = temp_props 1314 1315 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1316 this = self._parse_user_defined_function(kind=create_token.token_type) 1317 1318 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1319 extend_props(self._parse_properties()) 1320 1321 self._match(TokenType.ALIAS) 1322 1323 if self._match(TokenType.COMMAND): 1324 expression = self._parse_as_command(self._prev) 1325 else: 1326 begin = self._match(TokenType.BEGIN) 1327 return_ = self._match_text_seq("RETURN") 1328 1329 if self._match(TokenType.STRING, advance=False): 1330 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1331 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1332 expression = self._parse_string() 1333 extend_props(self._parse_properties()) 1334 else: 1335 expression = self._parse_statement() 1336 1337 end = self._match_text_seq("END") 1338 1339 if return_: 1340 expression = self.expression(exp.Return, this=expression) 1341 elif create_token.token_type == TokenType.INDEX: 1342 this = self._parse_index(index=self._parse_id_var()) 1343 elif create_token.token_type in self.DB_CREATABLES: 1344 table_parts = self._parse_table_parts(schema=True) 1345 1346 # exp.Properties.Location.POST_NAME 1347 self._match(TokenType.COMMA) 1348 extend_props(self._parse_properties(before=True)) 1349 1350 this = self._parse_schema(this=table_parts) 1351 1352 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1353 extend_props(self._parse_properties()) 1354 1355 self._match(TokenType.ALIAS) 1356 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1357 # exp.Properties.Location.POST_ALIAS 1358 extend_props(self._parse_properties()) 1359 1360 expression = self._parse_ddl_select() 1361 1362 if create_token.token_type == TokenType.TABLE: 1363 # exp.Properties.Location.POST_EXPRESSION 1364 extend_props(self._parse_properties()) 1365 1366 indexes = [] 1367 while True: 1368 index = self._parse_index() 1369 1370 # exp.Properties.Location.POST_INDEX 1371 extend_props(self._parse_properties()) 1372 1373 if not index: 1374 break 1375 else: 1376 self._match(TokenType.COMMA) 1377 indexes.append(index) 1378 elif create_token.token_type == TokenType.VIEW: 1379 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1380 no_schema_binding = True 1381 1382 shallow = self._match_text_seq("SHALLOW") 1383 1384 if self._match_texts(self.CLONE_KEYWORDS): 1385 copy = self._prev.text.lower() == "copy" 1386 clone = self._parse_table(schema=True) 1387 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1388 clone_kind = ( 1389 self._match(TokenType.L_PAREN) 1390 and self._match_texts(self.CLONE_KINDS) 1391 and self._prev.text.upper() 1392 ) 1393 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1394 self._match(TokenType.R_PAREN) 1395 clone = self.expression( 1396 exp.Clone, 1397 this=clone, 1398 when=when, 1399 kind=clone_kind, 1400 shallow=shallow, 1401 expression=clone_expression, 1402 copy=copy, 1403 ) 1404 1405 return self.expression( 1406 exp.Create, 1407 comments=comments, 1408 this=this, 1409 kind=create_token.text, 1410 replace=replace, 1411 unique=unique, 1412 expression=expression, 1413 exists=exists, 1414 properties=properties, 1415 indexes=indexes, 1416 no_schema_binding=no_schema_binding, 1417 begin=begin, 1418 end=end, 1419 clone=clone, 1420 ) 1421 1422 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1423 # only used for teradata currently 1424 self._match(TokenType.COMMA) 1425 1426 kwargs = { 1427 "no": self._match_text_seq("NO"), 1428 "dual": self._match_text_seq("DUAL"), 1429 "before": self._match_text_seq("BEFORE"), 1430 "default": self._match_text_seq("DEFAULT"), 1431 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1432 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1433 "after": self._match_text_seq("AFTER"), 1434 "minimum": self._match_texts(("MIN", "MINIMUM")), 1435 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1436 } 1437 1438 if self._match_texts(self.PROPERTY_PARSERS): 1439 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1440 try: 1441 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1442 except TypeError: 1443 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1444 1445 return None 1446 1447 def _parse_property(self) -> t.Optional[exp.Expression]: 1448 if self._match_texts(self.PROPERTY_PARSERS): 1449 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1450 1451 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1452 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1453 1454 if self._match_text_seq("COMPOUND", "SORTKEY"): 1455 return self._parse_sortkey(compound=True) 1456 1457 if self._match_text_seq("SQL", "SECURITY"): 1458 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1459 1460 index = self._index 1461 key = self._parse_column() 1462 1463 if not self._match(TokenType.EQ): 1464 self._retreat(index) 1465 return None 1466 1467 return self.expression( 1468 exp.Property, 1469 this=key.to_dot() if isinstance(key, exp.Column) else key, 1470 value=self._parse_column() or self._parse_var(any_token=True), 1471 ) 1472 1473 def _parse_stored(self) -> exp.FileFormatProperty: 1474 self._match(TokenType.ALIAS) 1475 1476 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1477 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1478 1479 return self.expression( 1480 exp.FileFormatProperty, 1481 this=self.expression( 1482 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1483 ) 1484 if input_format or output_format 1485 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1486 ) 1487 1488 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1489 self._match(TokenType.EQ) 1490 self._match(TokenType.ALIAS) 1491 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1492 1493 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1494 properties = [] 1495 while True: 1496 if before: 1497 prop = self._parse_property_before() 1498 else: 1499 prop = self._parse_property() 1500 1501 if not prop: 1502 break 1503 for p in ensure_list(prop): 1504 properties.append(p) 1505 1506 if properties: 1507 return self.expression(exp.Properties, expressions=properties) 1508 1509 return None 1510 1511 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1512 return self.expression( 1513 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1514 ) 1515 1516 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1517 if self._index >= 2: 1518 pre_volatile_token = self._tokens[self._index - 2] 1519 else: 1520 pre_volatile_token = None 1521 1522 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1523 return exp.VolatileProperty() 1524 1525 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1526 1527 def _parse_with_property( 1528 self, 1529 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1530 if self._match(TokenType.L_PAREN, advance=False): 1531 return self._parse_wrapped_csv(self._parse_property) 1532 1533 if self._match_text_seq("JOURNAL"): 1534 return self._parse_withjournaltable() 1535 1536 if self._match_text_seq("DATA"): 1537 return self._parse_withdata(no=False) 1538 elif self._match_text_seq("NO", "DATA"): 1539 return self._parse_withdata(no=True) 1540 1541 if not self._next: 1542 return None 1543 1544 return self._parse_withisolatedloading() 1545 1546 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1547 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1548 self._match(TokenType.EQ) 1549 1550 user = self._parse_id_var() 1551 self._match(TokenType.PARAMETER) 1552 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1553 1554 if not user or not host: 1555 return None 1556 1557 return exp.DefinerProperty(this=f"{user}@{host}") 1558 1559 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1560 self._match(TokenType.TABLE) 1561 self._match(TokenType.EQ) 1562 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1563 1564 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1565 return self.expression(exp.LogProperty, no=no) 1566 1567 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1568 return self.expression(exp.JournalProperty, **kwargs) 1569 1570 def _parse_checksum(self) -> exp.ChecksumProperty: 1571 self._match(TokenType.EQ) 1572 1573 on = None 1574 if self._match(TokenType.ON): 1575 on = True 1576 elif self._match_text_seq("OFF"): 1577 on = False 1578 1579 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1580 1581 def _parse_cluster(self) -> exp.Cluster: 1582 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1583 1584 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1585 self._match_text_seq("BY") 1586 1587 self._match_l_paren() 1588 expressions = self._parse_csv(self._parse_column) 1589 self._match_r_paren() 1590 1591 if self._match_text_seq("SORTED", "BY"): 1592 self._match_l_paren() 1593 sorted_by = self._parse_csv(self._parse_ordered) 1594 self._match_r_paren() 1595 else: 1596 sorted_by = None 1597 1598 self._match(TokenType.INTO) 1599 buckets = self._parse_number() 1600 self._match_text_seq("BUCKETS") 1601 1602 return self.expression( 1603 exp.ClusteredByProperty, 1604 expressions=expressions, 1605 sorted_by=sorted_by, 1606 buckets=buckets, 1607 ) 1608 1609 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1610 if not self._match_text_seq("GRANTS"): 1611 self._retreat(self._index - 1) 1612 return None 1613 1614 return self.expression(exp.CopyGrantsProperty) 1615 1616 def _parse_freespace(self) -> exp.FreespaceProperty: 1617 self._match(TokenType.EQ) 1618 return self.expression( 1619 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1620 ) 1621 1622 def _parse_mergeblockratio( 1623 self, no: bool = False, default: bool = False 1624 ) -> exp.MergeBlockRatioProperty: 1625 if self._match(TokenType.EQ): 1626 return self.expression( 1627 exp.MergeBlockRatioProperty, 1628 this=self._parse_number(), 1629 percent=self._match(TokenType.PERCENT), 1630 ) 1631 1632 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1633 1634 def _parse_datablocksize( 1635 self, 1636 default: t.Optional[bool] = None, 1637 minimum: t.Optional[bool] = None, 1638 maximum: t.Optional[bool] = None, 1639 ) -> exp.DataBlocksizeProperty: 1640 self._match(TokenType.EQ) 1641 size = self._parse_number() 1642 1643 units = None 1644 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1645 units = self._prev.text 1646 1647 return self.expression( 1648 exp.DataBlocksizeProperty, 1649 size=size, 1650 units=units, 1651 default=default, 1652 minimum=minimum, 1653 maximum=maximum, 1654 ) 1655 1656 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1657 self._match(TokenType.EQ) 1658 always = self._match_text_seq("ALWAYS") 1659 manual = self._match_text_seq("MANUAL") 1660 never = self._match_text_seq("NEVER") 1661 default = self._match_text_seq("DEFAULT") 1662 1663 autotemp = None 1664 if self._match_text_seq("AUTOTEMP"): 1665 autotemp = self._parse_schema() 1666 1667 return self.expression( 1668 exp.BlockCompressionProperty, 1669 always=always, 1670 manual=manual, 1671 never=never, 1672 default=default, 1673 autotemp=autotemp, 1674 ) 1675 1676 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1677 no = self._match_text_seq("NO") 1678 concurrent = self._match_text_seq("CONCURRENT") 1679 self._match_text_seq("ISOLATED", "LOADING") 1680 for_all = self._match_text_seq("FOR", "ALL") 1681 for_insert = self._match_text_seq("FOR", "INSERT") 1682 for_none = self._match_text_seq("FOR", "NONE") 1683 return self.expression( 1684 exp.IsolatedLoadingProperty, 1685 no=no, 1686 concurrent=concurrent, 1687 for_all=for_all, 1688 for_insert=for_insert, 1689 for_none=for_none, 1690 ) 1691 1692 def _parse_locking(self) -> exp.LockingProperty: 1693 if self._match(TokenType.TABLE): 1694 kind = "TABLE" 1695 elif self._match(TokenType.VIEW): 1696 kind = "VIEW" 1697 elif self._match(TokenType.ROW): 1698 kind = "ROW" 1699 elif self._match_text_seq("DATABASE"): 1700 kind = "DATABASE" 1701 else: 1702 kind = None 1703 1704 if kind in ("DATABASE", "TABLE", "VIEW"): 1705 this = self._parse_table_parts() 1706 else: 1707 this = None 1708 1709 if self._match(TokenType.FOR): 1710 for_or_in = "FOR" 1711 elif self._match(TokenType.IN): 1712 for_or_in = "IN" 1713 else: 1714 for_or_in = None 1715 1716 if self._match_text_seq("ACCESS"): 1717 lock_type = "ACCESS" 1718 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1719 lock_type = "EXCLUSIVE" 1720 elif self._match_text_seq("SHARE"): 1721 lock_type = "SHARE" 1722 elif self._match_text_seq("READ"): 1723 lock_type = "READ" 1724 elif self._match_text_seq("WRITE"): 1725 lock_type = "WRITE" 1726 elif self._match_text_seq("CHECKSUM"): 1727 lock_type = "CHECKSUM" 1728 else: 1729 lock_type = None 1730 1731 override = self._match_text_seq("OVERRIDE") 1732 1733 return self.expression( 1734 exp.LockingProperty, 1735 this=this, 1736 kind=kind, 1737 for_or_in=for_or_in, 1738 lock_type=lock_type, 1739 override=override, 1740 ) 1741 1742 def _parse_partition_by(self) -> t.List[exp.Expression]: 1743 if self._match(TokenType.PARTITION_BY): 1744 return self._parse_csv(self._parse_conjunction) 1745 return [] 1746 1747 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1748 self._match(TokenType.EQ) 1749 return self.expression( 1750 exp.PartitionedByProperty, 1751 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1752 ) 1753 1754 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1755 if self._match_text_seq("AND", "STATISTICS"): 1756 statistics = True 1757 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1758 statistics = False 1759 else: 1760 statistics = None 1761 1762 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1763 1764 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1765 if self._match_text_seq("PRIMARY", "INDEX"): 1766 return exp.NoPrimaryIndexProperty() 1767 return None 1768 1769 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1770 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1771 return exp.OnCommitProperty() 1772 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1773 return exp.OnCommitProperty(delete=True) 1774 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1775 1776 def _parse_distkey(self) -> exp.DistKeyProperty: 1777 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1778 1779 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1780 table = self._parse_table(schema=True) 1781 1782 options = [] 1783 while self._match_texts(("INCLUDING", "EXCLUDING")): 1784 this = self._prev.text.upper() 1785 1786 id_var = self._parse_id_var() 1787 if not id_var: 1788 return None 1789 1790 options.append( 1791 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1792 ) 1793 1794 return self.expression(exp.LikeProperty, this=table, expressions=options) 1795 1796 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1797 return self.expression( 1798 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1799 ) 1800 1801 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1802 self._match(TokenType.EQ) 1803 return self.expression( 1804 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1805 ) 1806 1807 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 1808 self._match_text_seq("WITH", "CONNECTION") 1809 return self.expression( 1810 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 1811 ) 1812 1813 def _parse_returns(self) -> exp.ReturnsProperty: 1814 value: t.Optional[exp.Expression] 1815 is_table = self._match(TokenType.TABLE) 1816 1817 if is_table: 1818 if self._match(TokenType.LT): 1819 value = self.expression( 1820 exp.Schema, 1821 this="TABLE", 1822 expressions=self._parse_csv(self._parse_struct_types), 1823 ) 1824 if not self._match(TokenType.GT): 1825 self.raise_error("Expecting >") 1826 else: 1827 value = self._parse_schema(exp.var("TABLE")) 1828 else: 1829 value = self._parse_types() 1830 1831 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1832 1833 def _parse_describe(self) -> exp.Describe: 1834 kind = self._match_set(self.CREATABLES) and self._prev.text 1835 this = self._parse_table(schema=True) 1836 properties = self._parse_properties() 1837 expressions = properties.expressions if properties else None 1838 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1839 1840 def _parse_insert(self) -> exp.Insert: 1841 comments = ensure_list(self._prev_comments) 1842 overwrite = self._match(TokenType.OVERWRITE) 1843 ignore = self._match(TokenType.IGNORE) 1844 local = self._match_text_seq("LOCAL") 1845 alternative = None 1846 1847 if self._match_text_seq("DIRECTORY"): 1848 this: t.Optional[exp.Expression] = self.expression( 1849 exp.Directory, 1850 this=self._parse_var_or_string(), 1851 local=local, 1852 row_format=self._parse_row_format(match_row=True), 1853 ) 1854 else: 1855 if self._match(TokenType.OR): 1856 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1857 1858 self._match(TokenType.INTO) 1859 comments += ensure_list(self._prev_comments) 1860 self._match(TokenType.TABLE) 1861 this = self._parse_table(schema=True) 1862 1863 returning = self._parse_returning() 1864 1865 return self.expression( 1866 exp.Insert, 1867 comments=comments, 1868 this=this, 1869 by_name=self._match_text_seq("BY", "NAME"), 1870 exists=self._parse_exists(), 1871 partition=self._parse_partition(), 1872 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1873 and self._parse_conjunction(), 1874 expression=self._parse_ddl_select(), 1875 conflict=self._parse_on_conflict(), 1876 returning=returning or self._parse_returning(), 1877 overwrite=overwrite, 1878 alternative=alternative, 1879 ignore=ignore, 1880 ) 1881 1882 def _parse_kill(self) -> exp.Kill: 1883 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1884 1885 return self.expression( 1886 exp.Kill, 1887 this=self._parse_primary(), 1888 kind=kind, 1889 ) 1890 1891 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1892 conflict = self._match_text_seq("ON", "CONFLICT") 1893 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1894 1895 if not conflict and not duplicate: 1896 return None 1897 1898 nothing = None 1899 expressions = None 1900 key = None 1901 constraint = None 1902 1903 if conflict: 1904 if self._match_text_seq("ON", "CONSTRAINT"): 1905 constraint = self._parse_id_var() 1906 else: 1907 key = self._parse_csv(self._parse_value) 1908 1909 self._match_text_seq("DO") 1910 if self._match_text_seq("NOTHING"): 1911 nothing = True 1912 else: 1913 self._match(TokenType.UPDATE) 1914 self._match(TokenType.SET) 1915 expressions = self._parse_csv(self._parse_equality) 1916 1917 return self.expression( 1918 exp.OnConflict, 1919 duplicate=duplicate, 1920 expressions=expressions, 1921 nothing=nothing, 1922 key=key, 1923 constraint=constraint, 1924 ) 1925 1926 def _parse_returning(self) -> t.Optional[exp.Returning]: 1927 if not self._match(TokenType.RETURNING): 1928 return None 1929 return self.expression( 1930 exp.Returning, 1931 expressions=self._parse_csv(self._parse_expression), 1932 into=self._match(TokenType.INTO) and self._parse_table_part(), 1933 ) 1934 1935 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1936 if not self._match(TokenType.FORMAT): 1937 return None 1938 return self._parse_row_format() 1939 1940 def _parse_row_format( 1941 self, match_row: bool = False 1942 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1943 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1944 return None 1945 1946 if self._match_text_seq("SERDE"): 1947 this = self._parse_string() 1948 1949 serde_properties = None 1950 if self._match(TokenType.SERDE_PROPERTIES): 1951 serde_properties = self.expression( 1952 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1953 ) 1954 1955 return self.expression( 1956 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1957 ) 1958 1959 self._match_text_seq("DELIMITED") 1960 1961 kwargs = {} 1962 1963 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1964 kwargs["fields"] = self._parse_string() 1965 if self._match_text_seq("ESCAPED", "BY"): 1966 kwargs["escaped"] = self._parse_string() 1967 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1968 kwargs["collection_items"] = self._parse_string() 1969 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1970 kwargs["map_keys"] = self._parse_string() 1971 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1972 kwargs["lines"] = self._parse_string() 1973 if self._match_text_seq("NULL", "DEFINED", "AS"): 1974 kwargs["null"] = self._parse_string() 1975 1976 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1977 1978 def _parse_load(self) -> exp.LoadData | exp.Command: 1979 if self._match_text_seq("DATA"): 1980 local = self._match_text_seq("LOCAL") 1981 self._match_text_seq("INPATH") 1982 inpath = self._parse_string() 1983 overwrite = self._match(TokenType.OVERWRITE) 1984 self._match_pair(TokenType.INTO, TokenType.TABLE) 1985 1986 return self.expression( 1987 exp.LoadData, 1988 this=self._parse_table(schema=True), 1989 local=local, 1990 overwrite=overwrite, 1991 inpath=inpath, 1992 partition=self._parse_partition(), 1993 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1994 serde=self._match_text_seq("SERDE") and self._parse_string(), 1995 ) 1996 return self._parse_as_command(self._prev) 1997 1998 def _parse_delete(self) -> exp.Delete: 1999 # This handles MySQL's "Multiple-Table Syntax" 2000 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2001 tables = None 2002 comments = self._prev_comments 2003 if not self._match(TokenType.FROM, advance=False): 2004 tables = self._parse_csv(self._parse_table) or None 2005 2006 returning = self._parse_returning() 2007 2008 return self.expression( 2009 exp.Delete, 2010 comments=comments, 2011 tables=tables, 2012 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2013 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2014 where=self._parse_where(), 2015 returning=returning or self._parse_returning(), 2016 limit=self._parse_limit(), 2017 ) 2018 2019 def _parse_update(self) -> exp.Update: 2020 comments = self._prev_comments 2021 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2022 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2023 returning = self._parse_returning() 2024 return self.expression( 2025 exp.Update, 2026 comments=comments, 2027 **{ # type: ignore 2028 "this": this, 2029 "expressions": expressions, 2030 "from": self._parse_from(joins=True), 2031 "where": self._parse_where(), 2032 "returning": returning or self._parse_returning(), 2033 "order": self._parse_order(), 2034 "limit": self._parse_limit(), 2035 }, 2036 ) 2037 2038 def _parse_uncache(self) -> exp.Uncache: 2039 if not self._match(TokenType.TABLE): 2040 self.raise_error("Expecting TABLE after UNCACHE") 2041 2042 return self.expression( 2043 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2044 ) 2045 2046 def _parse_cache(self) -> exp.Cache: 2047 lazy = self._match_text_seq("LAZY") 2048 self._match(TokenType.TABLE) 2049 table = self._parse_table(schema=True) 2050 2051 options = [] 2052 if self._match_text_seq("OPTIONS"): 2053 self._match_l_paren() 2054 k = self._parse_string() 2055 self._match(TokenType.EQ) 2056 v = self._parse_string() 2057 options = [k, v] 2058 self._match_r_paren() 2059 2060 self._match(TokenType.ALIAS) 2061 return self.expression( 2062 exp.Cache, 2063 this=table, 2064 lazy=lazy, 2065 options=options, 2066 expression=self._parse_select(nested=True), 2067 ) 2068 2069 def _parse_partition(self) -> t.Optional[exp.Partition]: 2070 if not self._match(TokenType.PARTITION): 2071 return None 2072 2073 return self.expression( 2074 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2075 ) 2076 2077 def _parse_value(self) -> exp.Tuple: 2078 if self._match(TokenType.L_PAREN): 2079 expressions = self._parse_csv(self._parse_conjunction) 2080 self._match_r_paren() 2081 return self.expression(exp.Tuple, expressions=expressions) 2082 2083 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2084 # https://prestodb.io/docs/current/sql/values.html 2085 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2086 2087 def _parse_projections(self) -> t.List[exp.Expression]: 2088 return self._parse_expressions() 2089 2090 def _parse_select( 2091 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2092 ) -> t.Optional[exp.Expression]: 2093 cte = self._parse_with() 2094 2095 if cte: 2096 this = self._parse_statement() 2097 2098 if not this: 2099 self.raise_error("Failed to parse any statement following CTE") 2100 return cte 2101 2102 if "with" in this.arg_types: 2103 this.set("with", cte) 2104 else: 2105 self.raise_error(f"{this.key} does not support CTE") 2106 this = cte 2107 2108 return this 2109 2110 # duckdb supports leading with FROM x 2111 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2112 2113 if self._match(TokenType.SELECT): 2114 comments = self._prev_comments 2115 2116 hint = self._parse_hint() 2117 all_ = self._match(TokenType.ALL) 2118 distinct = self._match_set(self.DISTINCT_TOKENS) 2119 2120 kind = ( 2121 self._match(TokenType.ALIAS) 2122 and self._match_texts(("STRUCT", "VALUE")) 2123 and self._prev.text 2124 ) 2125 2126 if distinct: 2127 distinct = self.expression( 2128 exp.Distinct, 2129 on=self._parse_value() if self._match(TokenType.ON) else None, 2130 ) 2131 2132 if all_ and distinct: 2133 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2134 2135 limit = self._parse_limit(top=True) 2136 projections = self._parse_projections() 2137 2138 this = self.expression( 2139 exp.Select, 2140 kind=kind, 2141 hint=hint, 2142 distinct=distinct, 2143 expressions=projections, 2144 limit=limit, 2145 ) 2146 this.comments = comments 2147 2148 into = self._parse_into() 2149 if into: 2150 this.set("into", into) 2151 2152 if not from_: 2153 from_ = self._parse_from() 2154 2155 if from_: 2156 this.set("from", from_) 2157 2158 this = self._parse_query_modifiers(this) 2159 elif (table or nested) and self._match(TokenType.L_PAREN): 2160 if self._match(TokenType.PIVOT): 2161 this = self._parse_simplified_pivot() 2162 elif self._match(TokenType.FROM): 2163 this = exp.select("*").from_( 2164 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2165 ) 2166 else: 2167 this = self._parse_table() if table else self._parse_select(nested=True) 2168 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2169 2170 self._match_r_paren() 2171 2172 # We return early here so that the UNION isn't attached to the subquery by the 2173 # following call to _parse_set_operations, but instead becomes the parent node 2174 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2175 elif self._match(TokenType.VALUES): 2176 this = self.expression( 2177 exp.Values, 2178 expressions=self._parse_csv(self._parse_value), 2179 alias=self._parse_table_alias(), 2180 ) 2181 elif from_: 2182 this = exp.select("*").from_(from_.this, copy=False) 2183 else: 2184 this = None 2185 2186 return self._parse_set_operations(this) 2187 2188 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2189 if not skip_with_token and not self._match(TokenType.WITH): 2190 return None 2191 2192 comments = self._prev_comments 2193 recursive = self._match(TokenType.RECURSIVE) 2194 2195 expressions = [] 2196 while True: 2197 expressions.append(self._parse_cte()) 2198 2199 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2200 break 2201 else: 2202 self._match(TokenType.WITH) 2203 2204 return self.expression( 2205 exp.With, comments=comments, expressions=expressions, recursive=recursive 2206 ) 2207 2208 def _parse_cte(self) -> exp.CTE: 2209 alias = self._parse_table_alias() 2210 if not alias or not alias.this: 2211 self.raise_error("Expected CTE to have alias") 2212 2213 self._match(TokenType.ALIAS) 2214 return self.expression( 2215 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2216 ) 2217 2218 def _parse_table_alias( 2219 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2220 ) -> t.Optional[exp.TableAlias]: 2221 any_token = self._match(TokenType.ALIAS) 2222 alias = ( 2223 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2224 or self._parse_string_as_identifier() 2225 ) 2226 2227 index = self._index 2228 if self._match(TokenType.L_PAREN): 2229 columns = self._parse_csv(self._parse_function_parameter) 2230 self._match_r_paren() if columns else self._retreat(index) 2231 else: 2232 columns = None 2233 2234 if not alias and not columns: 2235 return None 2236 2237 return self.expression(exp.TableAlias, this=alias, columns=columns) 2238 2239 def _parse_subquery( 2240 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2241 ) -> t.Optional[exp.Subquery]: 2242 if not this: 2243 return None 2244 2245 return self.expression( 2246 exp.Subquery, 2247 this=this, 2248 pivots=self._parse_pivots(), 2249 alias=self._parse_table_alias() if parse_alias else None, 2250 ) 2251 2252 def _parse_query_modifiers( 2253 self, this: t.Optional[exp.Expression] 2254 ) -> t.Optional[exp.Expression]: 2255 if isinstance(this, self.MODIFIABLES): 2256 for join in iter(self._parse_join, None): 2257 this.append("joins", join) 2258 for lateral in iter(self._parse_lateral, None): 2259 this.append("laterals", lateral) 2260 2261 while True: 2262 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2263 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2264 key, expression = parser(self) 2265 2266 if expression: 2267 this.set(key, expression) 2268 if key == "limit": 2269 offset = expression.args.pop("offset", None) 2270 if offset: 2271 this.set("offset", exp.Offset(expression=offset)) 2272 continue 2273 break 2274 return this 2275 2276 def _parse_hint(self) -> t.Optional[exp.Hint]: 2277 if self._match(TokenType.HINT): 2278 hints = [] 2279 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2280 hints.extend(hint) 2281 2282 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2283 self.raise_error("Expected */ after HINT") 2284 2285 return self.expression(exp.Hint, expressions=hints) 2286 2287 return None 2288 2289 def _parse_into(self) -> t.Optional[exp.Into]: 2290 if not self._match(TokenType.INTO): 2291 return None 2292 2293 temp = self._match(TokenType.TEMPORARY) 2294 unlogged = self._match_text_seq("UNLOGGED") 2295 self._match(TokenType.TABLE) 2296 2297 return self.expression( 2298 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2299 ) 2300 2301 def _parse_from( 2302 self, joins: bool = False, skip_from_token: bool = False 2303 ) -> t.Optional[exp.From]: 2304 if not skip_from_token and not self._match(TokenType.FROM): 2305 return None 2306 2307 return self.expression( 2308 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2309 ) 2310 2311 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2312 if not self._match(TokenType.MATCH_RECOGNIZE): 2313 return None 2314 2315 self._match_l_paren() 2316 2317 partition = self._parse_partition_by() 2318 order = self._parse_order() 2319 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2320 2321 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2322 rows = exp.var("ONE ROW PER MATCH") 2323 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2324 text = "ALL ROWS PER MATCH" 2325 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2326 text += f" SHOW EMPTY MATCHES" 2327 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2328 text += f" OMIT EMPTY MATCHES" 2329 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2330 text += f" WITH UNMATCHED ROWS" 2331 rows = exp.var(text) 2332 else: 2333 rows = None 2334 2335 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2336 text = "AFTER MATCH SKIP" 2337 if self._match_text_seq("PAST", "LAST", "ROW"): 2338 text += f" PAST LAST ROW" 2339 elif self._match_text_seq("TO", "NEXT", "ROW"): 2340 text += f" TO NEXT ROW" 2341 elif self._match_text_seq("TO", "FIRST"): 2342 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2343 elif self._match_text_seq("TO", "LAST"): 2344 text += f" TO LAST {self._advance_any().text}" # type: ignore 2345 after = exp.var(text) 2346 else: 2347 after = None 2348 2349 if self._match_text_seq("PATTERN"): 2350 self._match_l_paren() 2351 2352 if not self._curr: 2353 self.raise_error("Expecting )", self._curr) 2354 2355 paren = 1 2356 start = self._curr 2357 2358 while self._curr and paren > 0: 2359 if self._curr.token_type == TokenType.L_PAREN: 2360 paren += 1 2361 if self._curr.token_type == TokenType.R_PAREN: 2362 paren -= 1 2363 2364 end = self._prev 2365 self._advance() 2366 2367 if paren > 0: 2368 self.raise_error("Expecting )", self._curr) 2369 2370 pattern = exp.var(self._find_sql(start, end)) 2371 else: 2372 pattern = None 2373 2374 define = ( 2375 self._parse_csv( 2376 lambda: self.expression( 2377 exp.Alias, 2378 alias=self._parse_id_var(any_token=True), 2379 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2380 ) 2381 ) 2382 if self._match_text_seq("DEFINE") 2383 else None 2384 ) 2385 2386 self._match_r_paren() 2387 2388 return self.expression( 2389 exp.MatchRecognize, 2390 partition_by=partition, 2391 order=order, 2392 measures=measures, 2393 rows=rows, 2394 after=after, 2395 pattern=pattern, 2396 define=define, 2397 alias=self._parse_table_alias(), 2398 ) 2399 2400 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2401 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2402 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2403 2404 if outer_apply or cross_apply: 2405 this = self._parse_select(table=True) 2406 view = None 2407 outer = not cross_apply 2408 elif self._match(TokenType.LATERAL): 2409 this = self._parse_select(table=True) 2410 view = self._match(TokenType.VIEW) 2411 outer = self._match(TokenType.OUTER) 2412 else: 2413 return None 2414 2415 if not this: 2416 this = ( 2417 self._parse_unnest() 2418 or self._parse_function() 2419 or self._parse_id_var(any_token=False) 2420 ) 2421 2422 while self._match(TokenType.DOT): 2423 this = exp.Dot( 2424 this=this, 2425 expression=self._parse_function() or self._parse_id_var(any_token=False), 2426 ) 2427 2428 if view: 2429 table = self._parse_id_var(any_token=False) 2430 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2431 table_alias: t.Optional[exp.TableAlias] = self.expression( 2432 exp.TableAlias, this=table, columns=columns 2433 ) 2434 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2435 # We move the alias from the lateral's child node to the lateral itself 2436 table_alias = this.args["alias"].pop() 2437 else: 2438 table_alias = self._parse_table_alias() 2439 2440 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2441 2442 def _parse_join_parts( 2443 self, 2444 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2445 return ( 2446 self._match_set(self.JOIN_METHODS) and self._prev, 2447 self._match_set(self.JOIN_SIDES) and self._prev, 2448 self._match_set(self.JOIN_KINDS) and self._prev, 2449 ) 2450 2451 def _parse_join( 2452 self, skip_join_token: bool = False, parse_bracket: bool = False 2453 ) -> t.Optional[exp.Join]: 2454 if self._match(TokenType.COMMA): 2455 return self.expression(exp.Join, this=self._parse_table()) 2456 2457 index = self._index 2458 method, side, kind = self._parse_join_parts() 2459 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2460 join = self._match(TokenType.JOIN) 2461 2462 if not skip_join_token and not join: 2463 self._retreat(index) 2464 kind = None 2465 method = None 2466 side = None 2467 2468 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2469 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2470 2471 if not skip_join_token and not join and not outer_apply and not cross_apply: 2472 return None 2473 2474 if outer_apply: 2475 side = Token(TokenType.LEFT, "LEFT") 2476 2477 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2478 2479 if method: 2480 kwargs["method"] = method.text 2481 if side: 2482 kwargs["side"] = side.text 2483 if kind: 2484 kwargs["kind"] = kind.text 2485 if hint: 2486 kwargs["hint"] = hint 2487 2488 if self._match(TokenType.ON): 2489 kwargs["on"] = self._parse_conjunction() 2490 elif self._match(TokenType.USING): 2491 kwargs["using"] = self._parse_wrapped_id_vars() 2492 elif not (kind and kind.token_type == TokenType.CROSS): 2493 index = self._index 2494 join = self._parse_join() 2495 2496 if join and self._match(TokenType.ON): 2497 kwargs["on"] = self._parse_conjunction() 2498 elif join and self._match(TokenType.USING): 2499 kwargs["using"] = self._parse_wrapped_id_vars() 2500 else: 2501 join = None 2502 self._retreat(index) 2503 2504 kwargs["this"].set("joins", [join] if join else None) 2505 2506 comments = [c for token in (method, side, kind) if token for c in token.comments] 2507 return self.expression(exp.Join, comments=comments, **kwargs) 2508 2509 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2510 this = self._parse_conjunction() 2511 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2512 return this 2513 2514 opclass = self._parse_var(any_token=True) 2515 if opclass: 2516 return self.expression(exp.Opclass, this=this, expression=opclass) 2517 2518 return this 2519 2520 def _parse_index( 2521 self, 2522 index: t.Optional[exp.Expression] = None, 2523 ) -> t.Optional[exp.Index]: 2524 if index: 2525 unique = None 2526 primary = None 2527 amp = None 2528 2529 self._match(TokenType.ON) 2530 self._match(TokenType.TABLE) # hive 2531 table = self._parse_table_parts(schema=True) 2532 else: 2533 unique = self._match(TokenType.UNIQUE) 2534 primary = self._match_text_seq("PRIMARY") 2535 amp = self._match_text_seq("AMP") 2536 2537 if not self._match(TokenType.INDEX): 2538 return None 2539 2540 index = self._parse_id_var() 2541 table = None 2542 2543 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2544 2545 if self._match(TokenType.L_PAREN, advance=False): 2546 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2547 else: 2548 columns = None 2549 2550 return self.expression( 2551 exp.Index, 2552 this=index, 2553 table=table, 2554 using=using, 2555 columns=columns, 2556 unique=unique, 2557 primary=primary, 2558 amp=amp, 2559 partition_by=self._parse_partition_by(), 2560 where=self._parse_where(), 2561 ) 2562 2563 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2564 hints: t.List[exp.Expression] = [] 2565 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2566 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2567 hints.append( 2568 self.expression( 2569 exp.WithTableHint, 2570 expressions=self._parse_csv( 2571 lambda: self._parse_function() or self._parse_var(any_token=True) 2572 ), 2573 ) 2574 ) 2575 self._match_r_paren() 2576 else: 2577 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2578 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2579 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2580 2581 self._match_texts({"INDEX", "KEY"}) 2582 if self._match(TokenType.FOR): 2583 hint.set("target", self._advance_any() and self._prev.text.upper()) 2584 2585 hint.set("expressions", self._parse_wrapped_id_vars()) 2586 hints.append(hint) 2587 2588 return hints or None 2589 2590 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2591 return ( 2592 (not schema and self._parse_function(optional_parens=False)) 2593 or self._parse_id_var(any_token=False) 2594 or self._parse_string_as_identifier() 2595 or self._parse_placeholder() 2596 ) 2597 2598 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2599 catalog = None 2600 db = None 2601 table = self._parse_table_part(schema=schema) 2602 2603 while self._match(TokenType.DOT): 2604 if catalog: 2605 # This allows nesting the table in arbitrarily many dot expressions if needed 2606 table = self.expression( 2607 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2608 ) 2609 else: 2610 catalog = db 2611 db = table 2612 table = self._parse_table_part(schema=schema) 2613 2614 if not table: 2615 self.raise_error(f"Expected table name but got {self._curr}") 2616 2617 return self.expression( 2618 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2619 ) 2620 2621 def _parse_table( 2622 self, 2623 schema: bool = False, 2624 joins: bool = False, 2625 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2626 parse_bracket: bool = False, 2627 ) -> t.Optional[exp.Expression]: 2628 lateral = self._parse_lateral() 2629 if lateral: 2630 return lateral 2631 2632 unnest = self._parse_unnest() 2633 if unnest: 2634 return unnest 2635 2636 values = self._parse_derived_table_values() 2637 if values: 2638 return values 2639 2640 subquery = self._parse_select(table=True) 2641 if subquery: 2642 if not subquery.args.get("pivots"): 2643 subquery.set("pivots", self._parse_pivots()) 2644 return subquery 2645 2646 bracket = parse_bracket and self._parse_bracket(None) 2647 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2648 this = t.cast( 2649 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2650 ) 2651 2652 if schema: 2653 return self._parse_schema(this=this) 2654 2655 version = self._parse_version() 2656 2657 if version: 2658 this.set("version", version) 2659 2660 if self.ALIAS_POST_TABLESAMPLE: 2661 table_sample = self._parse_table_sample() 2662 2663 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2664 if alias: 2665 this.set("alias", alias) 2666 2667 if self._match_text_seq("AT"): 2668 this.set("index", self._parse_id_var()) 2669 2670 this.set("hints", self._parse_table_hints()) 2671 2672 if not this.args.get("pivots"): 2673 this.set("pivots", self._parse_pivots()) 2674 2675 if not self.ALIAS_POST_TABLESAMPLE: 2676 table_sample = self._parse_table_sample() 2677 2678 if table_sample: 2679 table_sample.set("this", this) 2680 this = table_sample 2681 2682 if joins: 2683 for join in iter(self._parse_join, None): 2684 this.append("joins", join) 2685 2686 return this 2687 2688 def _parse_version(self) -> t.Optional[exp.Version]: 2689 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2690 this = "TIMESTAMP" 2691 elif self._match(TokenType.VERSION_SNAPSHOT): 2692 this = "VERSION" 2693 else: 2694 return None 2695 2696 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2697 kind = self._prev.text.upper() 2698 start = self._parse_bitwise() 2699 self._match_texts(("TO", "AND")) 2700 end = self._parse_bitwise() 2701 expression: t.Optional[exp.Expression] = self.expression( 2702 exp.Tuple, expressions=[start, end] 2703 ) 2704 elif self._match_text_seq("CONTAINED", "IN"): 2705 kind = "CONTAINED IN" 2706 expression = self.expression( 2707 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2708 ) 2709 elif self._match(TokenType.ALL): 2710 kind = "ALL" 2711 expression = None 2712 else: 2713 self._match_text_seq("AS", "OF") 2714 kind = "AS OF" 2715 expression = self._parse_type() 2716 2717 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2718 2719 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2720 if not self._match(TokenType.UNNEST): 2721 return None 2722 2723 expressions = self._parse_wrapped_csv(self._parse_type) 2724 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2725 2726 alias = self._parse_table_alias() if with_alias else None 2727 2728 if alias: 2729 if self.UNNEST_COLUMN_ONLY: 2730 if alias.args.get("columns"): 2731 self.raise_error("Unexpected extra column alias in unnest.") 2732 2733 alias.set("columns", [alias.this]) 2734 alias.set("this", None) 2735 2736 columns = alias.args.get("columns") or [] 2737 if offset and len(expressions) < len(columns): 2738 offset = columns.pop() 2739 2740 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2741 self._match(TokenType.ALIAS) 2742 offset = self._parse_id_var( 2743 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2744 ) or exp.to_identifier("offset") 2745 2746 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2747 2748 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2749 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2750 if not is_derived and not self._match(TokenType.VALUES): 2751 return None 2752 2753 expressions = self._parse_csv(self._parse_value) 2754 alias = self._parse_table_alias() 2755 2756 if is_derived: 2757 self._match_r_paren() 2758 2759 return self.expression( 2760 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2761 ) 2762 2763 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2764 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2765 as_modifier and self._match_text_seq("USING", "SAMPLE") 2766 ): 2767 return None 2768 2769 bucket_numerator = None 2770 bucket_denominator = None 2771 bucket_field = None 2772 percent = None 2773 rows = None 2774 size = None 2775 seed = None 2776 2777 kind = ( 2778 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2779 ) 2780 method = self._parse_var(tokens=(TokenType.ROW,)) 2781 2782 matched_l_paren = self._match(TokenType.L_PAREN) 2783 2784 if self.TABLESAMPLE_CSV: 2785 num = None 2786 expressions = self._parse_csv(self._parse_primary) 2787 else: 2788 expressions = None 2789 num = ( 2790 self._parse_factor() 2791 if self._match(TokenType.NUMBER, advance=False) 2792 else self._parse_primary() 2793 ) 2794 2795 if self._match_text_seq("BUCKET"): 2796 bucket_numerator = self._parse_number() 2797 self._match_text_seq("OUT", "OF") 2798 bucket_denominator = bucket_denominator = self._parse_number() 2799 self._match(TokenType.ON) 2800 bucket_field = self._parse_field() 2801 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2802 percent = num 2803 elif self._match(TokenType.ROWS): 2804 rows = num 2805 elif num: 2806 size = num 2807 2808 if matched_l_paren: 2809 self._match_r_paren() 2810 2811 if self._match(TokenType.L_PAREN): 2812 method = self._parse_var() 2813 seed = self._match(TokenType.COMMA) and self._parse_number() 2814 self._match_r_paren() 2815 elif self._match_texts(("SEED", "REPEATABLE")): 2816 seed = self._parse_wrapped(self._parse_number) 2817 2818 return self.expression( 2819 exp.TableSample, 2820 expressions=expressions, 2821 method=method, 2822 bucket_numerator=bucket_numerator, 2823 bucket_denominator=bucket_denominator, 2824 bucket_field=bucket_field, 2825 percent=percent, 2826 rows=rows, 2827 size=size, 2828 seed=seed, 2829 kind=kind, 2830 ) 2831 2832 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2833 return list(iter(self._parse_pivot, None)) or None 2834 2835 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2836 return list(iter(self._parse_join, None)) or None 2837 2838 # https://duckdb.org/docs/sql/statements/pivot 2839 def _parse_simplified_pivot(self) -> exp.Pivot: 2840 def _parse_on() -> t.Optional[exp.Expression]: 2841 this = self._parse_bitwise() 2842 return self._parse_in(this) if self._match(TokenType.IN) else this 2843 2844 this = self._parse_table() 2845 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2846 using = self._match(TokenType.USING) and self._parse_csv( 2847 lambda: self._parse_alias(self._parse_function()) 2848 ) 2849 group = self._parse_group() 2850 return self.expression( 2851 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2852 ) 2853 2854 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2855 index = self._index 2856 include_nulls = None 2857 2858 if self._match(TokenType.PIVOT): 2859 unpivot = False 2860 elif self._match(TokenType.UNPIVOT): 2861 unpivot = True 2862 2863 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2864 if self._match_text_seq("INCLUDE", "NULLS"): 2865 include_nulls = True 2866 elif self._match_text_seq("EXCLUDE", "NULLS"): 2867 include_nulls = False 2868 else: 2869 return None 2870 2871 expressions = [] 2872 field = None 2873 2874 if not self._match(TokenType.L_PAREN): 2875 self._retreat(index) 2876 return None 2877 2878 if unpivot: 2879 expressions = self._parse_csv(self._parse_column) 2880 else: 2881 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2882 2883 if not expressions: 2884 self.raise_error("Failed to parse PIVOT's aggregation list") 2885 2886 if not self._match(TokenType.FOR): 2887 self.raise_error("Expecting FOR") 2888 2889 value = self._parse_column() 2890 2891 if not self._match(TokenType.IN): 2892 self.raise_error("Expecting IN") 2893 2894 field = self._parse_in(value, alias=True) 2895 2896 self._match_r_paren() 2897 2898 pivot = self.expression( 2899 exp.Pivot, 2900 expressions=expressions, 2901 field=field, 2902 unpivot=unpivot, 2903 include_nulls=include_nulls, 2904 ) 2905 2906 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2907 pivot.set("alias", self._parse_table_alias()) 2908 2909 if not unpivot: 2910 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2911 2912 columns: t.List[exp.Expression] = [] 2913 for fld in pivot.args["field"].expressions: 2914 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2915 for name in names: 2916 if self.PREFIXED_PIVOT_COLUMNS: 2917 name = f"{name}_{field_name}" if name else field_name 2918 else: 2919 name = f"{field_name}_{name}" if name else field_name 2920 2921 columns.append(exp.to_identifier(name)) 2922 2923 pivot.set("columns", columns) 2924 2925 return pivot 2926 2927 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2928 return [agg.alias for agg in aggregations] 2929 2930 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2931 if not skip_where_token and not self._match(TokenType.WHERE): 2932 return None 2933 2934 return self.expression( 2935 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2936 ) 2937 2938 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2939 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2940 return None 2941 2942 elements = defaultdict(list) 2943 2944 if self._match(TokenType.ALL): 2945 return self.expression(exp.Group, all=True) 2946 2947 while True: 2948 expressions = self._parse_csv(self._parse_conjunction) 2949 if expressions: 2950 elements["expressions"].extend(expressions) 2951 2952 grouping_sets = self._parse_grouping_sets() 2953 if grouping_sets: 2954 elements["grouping_sets"].extend(grouping_sets) 2955 2956 rollup = None 2957 cube = None 2958 totals = None 2959 2960 index = self._index 2961 with_ = self._match(TokenType.WITH) 2962 if self._match(TokenType.ROLLUP): 2963 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2964 elements["rollup"].extend(ensure_list(rollup)) 2965 2966 if self._match(TokenType.CUBE): 2967 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2968 elements["cube"].extend(ensure_list(cube)) 2969 2970 if self._match_text_seq("TOTALS"): 2971 totals = True 2972 elements["totals"] = True # type: ignore 2973 2974 if not (grouping_sets or rollup or cube or totals): 2975 if with_: 2976 self._retreat(index) 2977 break 2978 2979 return self.expression(exp.Group, **elements) # type: ignore 2980 2981 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2982 if not self._match(TokenType.GROUPING_SETS): 2983 return None 2984 2985 return self._parse_wrapped_csv(self._parse_grouping_set) 2986 2987 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2988 if self._match(TokenType.L_PAREN): 2989 grouping_set = self._parse_csv(self._parse_column) 2990 self._match_r_paren() 2991 return self.expression(exp.Tuple, expressions=grouping_set) 2992 2993 return self._parse_column() 2994 2995 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2996 if not skip_having_token and not self._match(TokenType.HAVING): 2997 return None 2998 return self.expression(exp.Having, this=self._parse_conjunction()) 2999 3000 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3001 if not self._match(TokenType.QUALIFY): 3002 return None 3003 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3004 3005 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3006 if skip_start_token: 3007 start = None 3008 elif self._match(TokenType.START_WITH): 3009 start = self._parse_conjunction() 3010 else: 3011 return None 3012 3013 self._match(TokenType.CONNECT_BY) 3014 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3015 exp.Prior, this=self._parse_bitwise() 3016 ) 3017 connect = self._parse_conjunction() 3018 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3019 3020 if not start and self._match(TokenType.START_WITH): 3021 start = self._parse_conjunction() 3022 3023 return self.expression(exp.Connect, start=start, connect=connect) 3024 3025 def _parse_order( 3026 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3027 ) -> t.Optional[exp.Expression]: 3028 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3029 return this 3030 3031 return self.expression( 3032 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3033 ) 3034 3035 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3036 if not self._match(token): 3037 return None 3038 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3039 3040 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3041 this = parse_method() if parse_method else self._parse_conjunction() 3042 3043 asc = self._match(TokenType.ASC) 3044 desc = self._match(TokenType.DESC) or (asc and False) 3045 3046 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3047 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3048 3049 nulls_first = is_nulls_first or False 3050 explicitly_null_ordered = is_nulls_first or is_nulls_last 3051 3052 if ( 3053 not explicitly_null_ordered 3054 and ( 3055 (not desc and self.NULL_ORDERING == "nulls_are_small") 3056 or (desc and self.NULL_ORDERING != "nulls_are_small") 3057 ) 3058 and self.NULL_ORDERING != "nulls_are_last" 3059 ): 3060 nulls_first = True 3061 3062 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3063 3064 def _parse_limit( 3065 self, this: t.Optional[exp.Expression] = None, top: bool = False 3066 ) -> t.Optional[exp.Expression]: 3067 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3068 comments = self._prev_comments 3069 if top: 3070 limit_paren = self._match(TokenType.L_PAREN) 3071 expression = self._parse_number() 3072 3073 if limit_paren: 3074 self._match_r_paren() 3075 else: 3076 expression = self._parse_term() 3077 3078 if self._match(TokenType.COMMA): 3079 offset = expression 3080 expression = self._parse_term() 3081 else: 3082 offset = None 3083 3084 limit_exp = self.expression( 3085 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3086 ) 3087 3088 return limit_exp 3089 3090 if self._match(TokenType.FETCH): 3091 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3092 direction = self._prev.text if direction else "FIRST" 3093 3094 count = self._parse_field(tokens=self.FETCH_TOKENS) 3095 percent = self._match(TokenType.PERCENT) 3096 3097 self._match_set((TokenType.ROW, TokenType.ROWS)) 3098 3099 only = self._match_text_seq("ONLY") 3100 with_ties = self._match_text_seq("WITH", "TIES") 3101 3102 if only and with_ties: 3103 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3104 3105 return self.expression( 3106 exp.Fetch, 3107 direction=direction, 3108 count=count, 3109 percent=percent, 3110 with_ties=with_ties, 3111 ) 3112 3113 return this 3114 3115 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3116 if not self._match(TokenType.OFFSET): 3117 return this 3118 3119 count = self._parse_term() 3120 self._match_set((TokenType.ROW, TokenType.ROWS)) 3121 return self.expression(exp.Offset, this=this, expression=count) 3122 3123 def _parse_locks(self) -> t.List[exp.Lock]: 3124 locks = [] 3125 while True: 3126 if self._match_text_seq("FOR", "UPDATE"): 3127 update = True 3128 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3129 "LOCK", "IN", "SHARE", "MODE" 3130 ): 3131 update = False 3132 else: 3133 break 3134 3135 expressions = None 3136 if self._match_text_seq("OF"): 3137 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3138 3139 wait: t.Optional[bool | exp.Expression] = None 3140 if self._match_text_seq("NOWAIT"): 3141 wait = True 3142 elif self._match_text_seq("WAIT"): 3143 wait = self._parse_primary() 3144 elif self._match_text_seq("SKIP", "LOCKED"): 3145 wait = False 3146 3147 locks.append( 3148 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3149 ) 3150 3151 return locks 3152 3153 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3154 if not self._match_set(self.SET_OPERATIONS): 3155 return this 3156 3157 token_type = self._prev.token_type 3158 3159 if token_type == TokenType.UNION: 3160 expression = exp.Union 3161 elif token_type == TokenType.EXCEPT: 3162 expression = exp.Except 3163 else: 3164 expression = exp.Intersect 3165 3166 return self.expression( 3167 expression, 3168 comments=self._prev.comments, 3169 this=this, 3170 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3171 by_name=self._match_text_seq("BY", "NAME"), 3172 expression=self._parse_set_operations(self._parse_select(nested=True)), 3173 ) 3174 3175 def _parse_expression(self) -> t.Optional[exp.Expression]: 3176 return self._parse_alias(self._parse_conjunction()) 3177 3178 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3179 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3180 3181 def _parse_equality(self) -> t.Optional[exp.Expression]: 3182 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3183 3184 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3185 return self._parse_tokens(self._parse_range, self.COMPARISON) 3186 3187 def _parse_range(self) -> t.Optional[exp.Expression]: 3188 this = self._parse_bitwise() 3189 negate = self._match(TokenType.NOT) 3190 3191 if self._match_set(self.RANGE_PARSERS): 3192 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3193 if not expression: 3194 return this 3195 3196 this = expression 3197 elif self._match(TokenType.ISNULL): 3198 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3199 3200 # Postgres supports ISNULL and NOTNULL for conditions. 3201 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3202 if self._match(TokenType.NOTNULL): 3203 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3204 this = self.expression(exp.Not, this=this) 3205 3206 if negate: 3207 this = self.expression(exp.Not, this=this) 3208 3209 if self._match(TokenType.IS): 3210 this = self._parse_is(this) 3211 3212 return this 3213 3214 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3215 index = self._index - 1 3216 negate = self._match(TokenType.NOT) 3217 3218 if self._match_text_seq("DISTINCT", "FROM"): 3219 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3220 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3221 3222 expression = self._parse_null() or self._parse_boolean() 3223 if not expression: 3224 self._retreat(index) 3225 return None 3226 3227 this = self.expression(exp.Is, this=this, expression=expression) 3228 return self.expression(exp.Not, this=this) if negate else this 3229 3230 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3231 unnest = self._parse_unnest(with_alias=False) 3232 if unnest: 3233 this = self.expression(exp.In, this=this, unnest=unnest) 3234 elif self._match(TokenType.L_PAREN): 3235 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3236 3237 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3238 this = self.expression(exp.In, this=this, query=expressions[0]) 3239 else: 3240 this = self.expression(exp.In, this=this, expressions=expressions) 3241 3242 self._match_r_paren(this) 3243 else: 3244 this = self.expression(exp.In, this=this, field=self._parse_field()) 3245 3246 return this 3247 3248 def _parse_between(self, this: exp.Expression) -> exp.Between: 3249 low = self._parse_bitwise() 3250 self._match(TokenType.AND) 3251 high = self._parse_bitwise() 3252 return self.expression(exp.Between, this=this, low=low, high=high) 3253 3254 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3255 if not self._match(TokenType.ESCAPE): 3256 return this 3257 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3258 3259 def _parse_interval(self) -> t.Optional[exp.Interval]: 3260 index = self._index 3261 3262 if not self._match(TokenType.INTERVAL): 3263 return None 3264 3265 if self._match(TokenType.STRING, advance=False): 3266 this = self._parse_primary() 3267 else: 3268 this = self._parse_term() 3269 3270 if not this: 3271 self._retreat(index) 3272 return None 3273 3274 unit = self._parse_function() or self._parse_var(any_token=True) 3275 3276 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3277 # each INTERVAL expression into this canonical form so it's easy to transpile 3278 if this and this.is_number: 3279 this = exp.Literal.string(this.name) 3280 elif this and this.is_string: 3281 parts = this.name.split() 3282 3283 if len(parts) == 2: 3284 if unit: 3285 # This is not actually a unit, it's something else (e.g. a "window side") 3286 unit = None 3287 self._retreat(self._index - 1) 3288 3289 this = exp.Literal.string(parts[0]) 3290 unit = self.expression(exp.Var, this=parts[1]) 3291 3292 return self.expression(exp.Interval, this=this, unit=unit) 3293 3294 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3295 this = self._parse_term() 3296 3297 while True: 3298 if self._match_set(self.BITWISE): 3299 this = self.expression( 3300 self.BITWISE[self._prev.token_type], 3301 this=this, 3302 expression=self._parse_term(), 3303 ) 3304 elif self._match(TokenType.DQMARK): 3305 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3306 elif self._match_pair(TokenType.LT, TokenType.LT): 3307 this = self.expression( 3308 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3309 ) 3310 elif self._match_pair(TokenType.GT, TokenType.GT): 3311 this = self.expression( 3312 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3313 ) 3314 else: 3315 break 3316 3317 return this 3318 3319 def _parse_term(self) -> t.Optional[exp.Expression]: 3320 return self._parse_tokens(self._parse_factor, self.TERM) 3321 3322 def _parse_factor(self) -> t.Optional[exp.Expression]: 3323 return self._parse_tokens(self._parse_unary, self.FACTOR) 3324 3325 def _parse_unary(self) -> t.Optional[exp.Expression]: 3326 if self._match_set(self.UNARY_PARSERS): 3327 return self.UNARY_PARSERS[self._prev.token_type](self) 3328 return self._parse_at_time_zone(self._parse_type()) 3329 3330 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3331 interval = parse_interval and self._parse_interval() 3332 if interval: 3333 return interval 3334 3335 index = self._index 3336 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3337 this = self._parse_column() 3338 3339 if data_type: 3340 if isinstance(this, exp.Literal): 3341 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3342 if parser: 3343 return parser(self, this, data_type) 3344 return self.expression(exp.Cast, this=this, to=data_type) 3345 if not data_type.expressions: 3346 self._retreat(index) 3347 return self._parse_column() 3348 return self._parse_column_ops(data_type) 3349 3350 return this and self._parse_column_ops(this) 3351 3352 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3353 this = self._parse_type() 3354 if not this: 3355 return None 3356 3357 return self.expression( 3358 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3359 ) 3360 3361 def _parse_types( 3362 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3363 ) -> t.Optional[exp.Expression]: 3364 index = self._index 3365 3366 prefix = self._match_text_seq("SYSUDTLIB", ".") 3367 3368 if not self._match_set(self.TYPE_TOKENS): 3369 identifier = allow_identifiers and self._parse_id_var( 3370 any_token=False, tokens=(TokenType.VAR,) 3371 ) 3372 3373 if identifier: 3374 tokens = self._tokenizer.tokenize(identifier.name) 3375 3376 if len(tokens) != 1: 3377 self.raise_error("Unexpected identifier", self._prev) 3378 3379 if tokens[0].token_type in self.TYPE_TOKENS: 3380 self._prev = tokens[0] 3381 elif self.SUPPORTS_USER_DEFINED_TYPES: 3382 type_name = identifier.name 3383 3384 while self._match(TokenType.DOT): 3385 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3386 3387 return exp.DataType.build(type_name, udt=True) 3388 else: 3389 return None 3390 else: 3391 return None 3392 3393 type_token = self._prev.token_type 3394 3395 if type_token == TokenType.PSEUDO_TYPE: 3396 return self.expression(exp.PseudoType, this=self._prev.text) 3397 3398 if type_token == TokenType.OBJECT_IDENTIFIER: 3399 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3400 3401 nested = type_token in self.NESTED_TYPE_TOKENS 3402 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3403 expressions = None 3404 maybe_func = False 3405 3406 if self._match(TokenType.L_PAREN): 3407 if is_struct: 3408 expressions = self._parse_csv(self._parse_struct_types) 3409 elif nested: 3410 expressions = self._parse_csv( 3411 lambda: self._parse_types( 3412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3413 ) 3414 ) 3415 elif type_token in self.ENUM_TYPE_TOKENS: 3416 expressions = self._parse_csv(self._parse_equality) 3417 else: 3418 expressions = self._parse_csv(self._parse_type_size) 3419 3420 if not expressions or not self._match(TokenType.R_PAREN): 3421 self._retreat(index) 3422 return None 3423 3424 maybe_func = True 3425 3426 this: t.Optional[exp.Expression] = None 3427 values: t.Optional[t.List[exp.Expression]] = None 3428 3429 if nested and self._match(TokenType.LT): 3430 if is_struct: 3431 expressions = self._parse_csv(self._parse_struct_types) 3432 else: 3433 expressions = self._parse_csv( 3434 lambda: self._parse_types( 3435 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3436 ) 3437 ) 3438 3439 if not self._match(TokenType.GT): 3440 self.raise_error("Expecting >") 3441 3442 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3443 values = self._parse_csv(self._parse_conjunction) 3444 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3445 3446 if type_token in self.TIMESTAMPS: 3447 if self._match_text_seq("WITH", "TIME", "ZONE"): 3448 maybe_func = False 3449 tz_type = ( 3450 exp.DataType.Type.TIMETZ 3451 if type_token in self.TIMES 3452 else exp.DataType.Type.TIMESTAMPTZ 3453 ) 3454 this = exp.DataType(this=tz_type, expressions=expressions) 3455 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3456 maybe_func = False 3457 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3458 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3459 maybe_func = False 3460 elif type_token == TokenType.INTERVAL: 3461 unit = self._parse_var() 3462 3463 if self._match_text_seq("TO"): 3464 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3465 else: 3466 span = None 3467 3468 if span or not unit: 3469 this = self.expression( 3470 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3471 ) 3472 else: 3473 this = self.expression(exp.Interval, unit=unit) 3474 3475 if maybe_func and check_func: 3476 index2 = self._index 3477 peek = self._parse_string() 3478 3479 if not peek: 3480 self._retreat(index) 3481 return None 3482 3483 self._retreat(index2) 3484 3485 if not this: 3486 if self._match_text_seq("UNSIGNED"): 3487 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3488 if not unsigned_type_token: 3489 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3490 3491 type_token = unsigned_type_token or type_token 3492 3493 this = exp.DataType( 3494 this=exp.DataType.Type[type_token.value], 3495 expressions=expressions, 3496 nested=nested, 3497 values=values, 3498 prefix=prefix, 3499 ) 3500 3501 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3502 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3503 3504 return this 3505 3506 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3507 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3508 self._match(TokenType.COLON) 3509 return self._parse_column_def(this) 3510 3511 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3512 if not self._match_text_seq("AT", "TIME", "ZONE"): 3513 return this 3514 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3515 3516 def _parse_column(self) -> t.Optional[exp.Expression]: 3517 this = self._parse_field() 3518 if isinstance(this, exp.Identifier): 3519 this = self.expression(exp.Column, this=this) 3520 elif not this: 3521 return self._parse_bracket(this) 3522 return self._parse_column_ops(this) 3523 3524 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3525 this = self._parse_bracket(this) 3526 3527 while self._match_set(self.COLUMN_OPERATORS): 3528 op_token = self._prev.token_type 3529 op = self.COLUMN_OPERATORS.get(op_token) 3530 3531 if op_token == TokenType.DCOLON: 3532 field = self._parse_types() 3533 if not field: 3534 self.raise_error("Expected type") 3535 elif op and self._curr: 3536 self._advance() 3537 value = self._prev.text 3538 field = ( 3539 exp.Literal.number(value) 3540 if self._prev.token_type == TokenType.NUMBER 3541 else exp.Literal.string(value) 3542 ) 3543 else: 3544 field = self._parse_field(anonymous_func=True, any_token=True) 3545 3546 if isinstance(field, exp.Func): 3547 # bigquery allows function calls like x.y.count(...) 3548 # SAFE.SUBSTR(...) 3549 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3550 this = self._replace_columns_with_dots(this) 3551 3552 if op: 3553 this = op(self, this, field) 3554 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3555 this = self.expression( 3556 exp.Column, 3557 this=field, 3558 table=this.this, 3559 db=this.args.get("table"), 3560 catalog=this.args.get("db"), 3561 ) 3562 else: 3563 this = self.expression(exp.Dot, this=this, expression=field) 3564 this = self._parse_bracket(this) 3565 return this 3566 3567 def _parse_primary(self) -> t.Optional[exp.Expression]: 3568 if self._match_set(self.PRIMARY_PARSERS): 3569 token_type = self._prev.token_type 3570 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3571 3572 if token_type == TokenType.STRING: 3573 expressions = [primary] 3574 while self._match(TokenType.STRING): 3575 expressions.append(exp.Literal.string(self._prev.text)) 3576 3577 if len(expressions) > 1: 3578 return self.expression(exp.Concat, expressions=expressions) 3579 3580 return primary 3581 3582 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3583 return exp.Literal.number(f"0.{self._prev.text}") 3584 3585 if self._match(TokenType.L_PAREN): 3586 comments = self._prev_comments 3587 query = self._parse_select() 3588 3589 if query: 3590 expressions = [query] 3591 else: 3592 expressions = self._parse_expressions() 3593 3594 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3595 3596 if isinstance(this, exp.Subqueryable): 3597 this = self._parse_set_operations( 3598 self._parse_subquery(this=this, parse_alias=False) 3599 ) 3600 elif len(expressions) > 1: 3601 this = self.expression(exp.Tuple, expressions=expressions) 3602 else: 3603 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3604 3605 if this: 3606 this.add_comments(comments) 3607 3608 self._match_r_paren(expression=this) 3609 return this 3610 3611 return None 3612 3613 def _parse_field( 3614 self, 3615 any_token: bool = False, 3616 tokens: t.Optional[t.Collection[TokenType]] = None, 3617 anonymous_func: bool = False, 3618 ) -> t.Optional[exp.Expression]: 3619 return ( 3620 self._parse_primary() 3621 or self._parse_function(anonymous=anonymous_func) 3622 or self._parse_id_var(any_token=any_token, tokens=tokens) 3623 ) 3624 3625 def _parse_function( 3626 self, 3627 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3628 anonymous: bool = False, 3629 optional_parens: bool = True, 3630 ) -> t.Optional[exp.Expression]: 3631 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 3632 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 3633 fn_syntax = False 3634 if ( 3635 self._match(TokenType.L_BRACE, advance=False) 3636 and self._next 3637 and self._next.text.upper() == "FN" 3638 ): 3639 self._advance(2) 3640 fn_syntax = True 3641 3642 func = self._parse_function_call( 3643 functions=functions, anonymous=anonymous, optional_parens=optional_parens 3644 ) 3645 3646 if fn_syntax: 3647 self._match(TokenType.R_BRACE) 3648 3649 return func 3650 3651 def _parse_function_call( 3652 self, 3653 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3654 anonymous: bool = False, 3655 optional_parens: bool = True, 3656 ) -> t.Optional[exp.Expression]: 3657 if not self._curr: 3658 return None 3659 3660 token_type = self._curr.token_type 3661 this = self._curr.text 3662 upper = this.upper() 3663 3664 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3665 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3666 self._advance() 3667 return parser(self) 3668 3669 if not self._next or self._next.token_type != TokenType.L_PAREN: 3670 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3671 self._advance() 3672 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3673 3674 return None 3675 3676 if token_type not in self.FUNC_TOKENS: 3677 return None 3678 3679 self._advance(2) 3680 3681 parser = self.FUNCTION_PARSERS.get(upper) 3682 if parser and not anonymous: 3683 this = parser(self) 3684 else: 3685 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3686 3687 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3688 this = self.expression(subquery_predicate, this=self._parse_select()) 3689 self._match_r_paren() 3690 return this 3691 3692 if functions is None: 3693 functions = self.FUNCTIONS 3694 3695 function = functions.get(upper) 3696 3697 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3698 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3699 3700 if function and not anonymous: 3701 func = self.validate_expression(function(args), args) 3702 if not self.NORMALIZE_FUNCTIONS: 3703 func.meta["name"] = this 3704 this = func 3705 else: 3706 this = self.expression(exp.Anonymous, this=this, expressions=args) 3707 3708 self._match_r_paren(this) 3709 return self._parse_window(this) 3710 3711 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3712 return self._parse_column_def(self._parse_id_var()) 3713 3714 def _parse_user_defined_function( 3715 self, kind: t.Optional[TokenType] = None 3716 ) -> t.Optional[exp.Expression]: 3717 this = self._parse_id_var() 3718 3719 while self._match(TokenType.DOT): 3720 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3721 3722 if not self._match(TokenType.L_PAREN): 3723 return this 3724 3725 expressions = self._parse_csv(self._parse_function_parameter) 3726 self._match_r_paren() 3727 return self.expression( 3728 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3729 ) 3730 3731 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3732 literal = self._parse_primary() 3733 if literal: 3734 return self.expression(exp.Introducer, this=token.text, expression=literal) 3735 3736 return self.expression(exp.Identifier, this=token.text) 3737 3738 def _parse_session_parameter(self) -> exp.SessionParameter: 3739 kind = None 3740 this = self._parse_id_var() or self._parse_primary() 3741 3742 if this and self._match(TokenType.DOT): 3743 kind = this.name 3744 this = self._parse_var() or self._parse_primary() 3745 3746 return self.expression(exp.SessionParameter, this=this, kind=kind) 3747 3748 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3749 index = self._index 3750 3751 if self._match(TokenType.L_PAREN): 3752 expressions = t.cast( 3753 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3754 ) 3755 3756 if not self._match(TokenType.R_PAREN): 3757 self._retreat(index) 3758 else: 3759 expressions = [self._parse_id_var()] 3760 3761 if self._match_set(self.LAMBDAS): 3762 return self.LAMBDAS[self._prev.token_type](self, expressions) 3763 3764 self._retreat(index) 3765 3766 this: t.Optional[exp.Expression] 3767 3768 if self._match(TokenType.DISTINCT): 3769 this = self.expression( 3770 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3771 ) 3772 else: 3773 this = self._parse_select_or_expression(alias=alias) 3774 3775 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3776 3777 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3778 index = self._index 3779 3780 if not self.errors: 3781 try: 3782 if self._parse_select(nested=True): 3783 return this 3784 except ParseError: 3785 pass 3786 finally: 3787 self.errors.clear() 3788 self._retreat(index) 3789 3790 if not self._match(TokenType.L_PAREN): 3791 return this 3792 3793 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3794 3795 self._match_r_paren() 3796 return self.expression(exp.Schema, this=this, expressions=args) 3797 3798 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3799 return self._parse_column_def(self._parse_field(any_token=True)) 3800 3801 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3802 # column defs are not really columns, they're identifiers 3803 if isinstance(this, exp.Column): 3804 this = this.this 3805 3806 kind = self._parse_types(schema=True) 3807 3808 if self._match_text_seq("FOR", "ORDINALITY"): 3809 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3810 3811 constraints: t.List[exp.Expression] = [] 3812 3813 if not kind and self._match(TokenType.ALIAS): 3814 constraints.append( 3815 self.expression( 3816 exp.ComputedColumnConstraint, 3817 this=self._parse_conjunction(), 3818 persisted=self._match_text_seq("PERSISTED"), 3819 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3820 ) 3821 ) 3822 3823 while True: 3824 constraint = self._parse_column_constraint() 3825 if not constraint: 3826 break 3827 constraints.append(constraint) 3828 3829 if not kind and not constraints: 3830 return this 3831 3832 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3833 3834 def _parse_auto_increment( 3835 self, 3836 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3837 start = None 3838 increment = None 3839 3840 if self._match(TokenType.L_PAREN, advance=False): 3841 args = self._parse_wrapped_csv(self._parse_bitwise) 3842 start = seq_get(args, 0) 3843 increment = seq_get(args, 1) 3844 elif self._match_text_seq("START"): 3845 start = self._parse_bitwise() 3846 self._match_text_seq("INCREMENT") 3847 increment = self._parse_bitwise() 3848 3849 if start and increment: 3850 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3851 3852 return exp.AutoIncrementColumnConstraint() 3853 3854 def _parse_compress(self) -> exp.CompressColumnConstraint: 3855 if self._match(TokenType.L_PAREN, advance=False): 3856 return self.expression( 3857 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3858 ) 3859 3860 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3861 3862 def _parse_generated_as_identity( 3863 self, 3864 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3865 if self._match_text_seq("BY", "DEFAULT"): 3866 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3867 this = self.expression( 3868 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3869 ) 3870 else: 3871 self._match_text_seq("ALWAYS") 3872 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3873 3874 self._match(TokenType.ALIAS) 3875 identity = self._match_text_seq("IDENTITY") 3876 3877 if self._match(TokenType.L_PAREN): 3878 if self._match(TokenType.START_WITH): 3879 this.set("start", self._parse_bitwise()) 3880 if self._match_text_seq("INCREMENT", "BY"): 3881 this.set("increment", self._parse_bitwise()) 3882 if self._match_text_seq("MINVALUE"): 3883 this.set("minvalue", self._parse_bitwise()) 3884 if self._match_text_seq("MAXVALUE"): 3885 this.set("maxvalue", self._parse_bitwise()) 3886 3887 if self._match_text_seq("CYCLE"): 3888 this.set("cycle", True) 3889 elif self._match_text_seq("NO", "CYCLE"): 3890 this.set("cycle", False) 3891 3892 if not identity: 3893 this.set("expression", self._parse_bitwise()) 3894 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 3895 args = self._parse_csv(self._parse_bitwise) 3896 this.set("start", seq_get(args, 0)) 3897 this.set("increment", seq_get(args, 1)) 3898 3899 self._match_r_paren() 3900 3901 return this 3902 3903 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3904 self._match_text_seq("LENGTH") 3905 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3906 3907 def _parse_not_constraint( 3908 self, 3909 ) -> t.Optional[exp.Expression]: 3910 if self._match_text_seq("NULL"): 3911 return self.expression(exp.NotNullColumnConstraint) 3912 if self._match_text_seq("CASESPECIFIC"): 3913 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3914 if self._match_text_seq("FOR", "REPLICATION"): 3915 return self.expression(exp.NotForReplicationColumnConstraint) 3916 return None 3917 3918 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3919 if self._match(TokenType.CONSTRAINT): 3920 this = self._parse_id_var() 3921 else: 3922 this = None 3923 3924 if self._match_texts(self.CONSTRAINT_PARSERS): 3925 return self.expression( 3926 exp.ColumnConstraint, 3927 this=this, 3928 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3929 ) 3930 3931 return this 3932 3933 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3934 if not self._match(TokenType.CONSTRAINT): 3935 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3936 3937 this = self._parse_id_var() 3938 expressions = [] 3939 3940 while True: 3941 constraint = self._parse_unnamed_constraint() or self._parse_function() 3942 if not constraint: 3943 break 3944 expressions.append(constraint) 3945 3946 return self.expression(exp.Constraint, this=this, expressions=expressions) 3947 3948 def _parse_unnamed_constraint( 3949 self, constraints: t.Optional[t.Collection[str]] = None 3950 ) -> t.Optional[exp.Expression]: 3951 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 3952 constraints or self.CONSTRAINT_PARSERS 3953 ): 3954 return None 3955 3956 constraint = self._prev.text.upper() 3957 if constraint not in self.CONSTRAINT_PARSERS: 3958 self.raise_error(f"No parser found for schema constraint {constraint}.") 3959 3960 return self.CONSTRAINT_PARSERS[constraint](self) 3961 3962 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3963 self._match_text_seq("KEY") 3964 return self.expression( 3965 exp.UniqueColumnConstraint, 3966 this=self._parse_schema(self._parse_id_var(any_token=False)), 3967 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3968 ) 3969 3970 def _parse_key_constraint_options(self) -> t.List[str]: 3971 options = [] 3972 while True: 3973 if not self._curr: 3974 break 3975 3976 if self._match(TokenType.ON): 3977 action = None 3978 on = self._advance_any() and self._prev.text 3979 3980 if self._match_text_seq("NO", "ACTION"): 3981 action = "NO ACTION" 3982 elif self._match_text_seq("CASCADE"): 3983 action = "CASCADE" 3984 elif self._match_text_seq("RESTRICT"): 3985 action = "RESTRICT" 3986 elif self._match_pair(TokenType.SET, TokenType.NULL): 3987 action = "SET NULL" 3988 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3989 action = "SET DEFAULT" 3990 else: 3991 self.raise_error("Invalid key constraint") 3992 3993 options.append(f"ON {on} {action}") 3994 elif self._match_text_seq("NOT", "ENFORCED"): 3995 options.append("NOT ENFORCED") 3996 elif self._match_text_seq("DEFERRABLE"): 3997 options.append("DEFERRABLE") 3998 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3999 options.append("INITIALLY DEFERRED") 4000 elif self._match_text_seq("NORELY"): 4001 options.append("NORELY") 4002 elif self._match_text_seq("MATCH", "FULL"): 4003 options.append("MATCH FULL") 4004 else: 4005 break 4006 4007 return options 4008 4009 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4010 if match and not self._match(TokenType.REFERENCES): 4011 return None 4012 4013 expressions = None 4014 this = self._parse_table(schema=True) 4015 options = self._parse_key_constraint_options() 4016 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4017 4018 def _parse_foreign_key(self) -> exp.ForeignKey: 4019 expressions = self._parse_wrapped_id_vars() 4020 reference = self._parse_references() 4021 options = {} 4022 4023 while self._match(TokenType.ON): 4024 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4025 self.raise_error("Expected DELETE or UPDATE") 4026 4027 kind = self._prev.text.lower() 4028 4029 if self._match_text_seq("NO", "ACTION"): 4030 action = "NO ACTION" 4031 elif self._match(TokenType.SET): 4032 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4033 action = "SET " + self._prev.text.upper() 4034 else: 4035 self._advance() 4036 action = self._prev.text.upper() 4037 4038 options[kind] = action 4039 4040 return self.expression( 4041 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 4042 ) 4043 4044 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4045 return self._parse_field() 4046 4047 def _parse_primary_key( 4048 self, wrapped_optional: bool = False, in_props: bool = False 4049 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4050 desc = ( 4051 self._match_set((TokenType.ASC, TokenType.DESC)) 4052 and self._prev.token_type == TokenType.DESC 4053 ) 4054 4055 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4056 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4057 4058 expressions = self._parse_wrapped_csv( 4059 self._parse_primary_key_part, optional=wrapped_optional 4060 ) 4061 options = self._parse_key_constraint_options() 4062 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4063 4064 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4065 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4066 return this 4067 4068 bracket_kind = self._prev.token_type 4069 4070 if self._match(TokenType.COLON): 4071 expressions: t.List[exp.Expression] = [ 4072 self.expression(exp.Slice, expression=self._parse_conjunction()) 4073 ] 4074 else: 4075 expressions = self._parse_csv( 4076 lambda: self._parse_slice( 4077 self._parse_alias(self._parse_conjunction(), explicit=True) 4078 ) 4079 ) 4080 4081 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4082 self.raise_error("Expected ]") 4083 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4084 self.raise_error("Expected }") 4085 4086 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4087 if bracket_kind == TokenType.L_BRACE: 4088 this = self.expression(exp.Struct, expressions=expressions) 4089 elif not this or this.name.upper() == "ARRAY": 4090 this = self.expression(exp.Array, expressions=expressions) 4091 else: 4092 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4093 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4094 4095 self._add_comments(this) 4096 return self._parse_bracket(this) 4097 4098 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4099 if self._match(TokenType.COLON): 4100 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4101 return this 4102 4103 def _parse_case(self) -> t.Optional[exp.Expression]: 4104 ifs = [] 4105 default = None 4106 4107 comments = self._prev_comments 4108 expression = self._parse_conjunction() 4109 4110 while self._match(TokenType.WHEN): 4111 this = self._parse_conjunction() 4112 self._match(TokenType.THEN) 4113 then = self._parse_conjunction() 4114 ifs.append(self.expression(exp.If, this=this, true=then)) 4115 4116 if self._match(TokenType.ELSE): 4117 default = self._parse_conjunction() 4118 4119 if not self._match(TokenType.END): 4120 self.raise_error("Expected END after CASE", self._prev) 4121 4122 return self._parse_window( 4123 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4124 ) 4125 4126 def _parse_if(self) -> t.Optional[exp.Expression]: 4127 if self._match(TokenType.L_PAREN): 4128 args = self._parse_csv(self._parse_conjunction) 4129 this = self.validate_expression(exp.If.from_arg_list(args), args) 4130 self._match_r_paren() 4131 else: 4132 index = self._index - 1 4133 condition = self._parse_conjunction() 4134 4135 if not condition: 4136 self._retreat(index) 4137 return None 4138 4139 self._match(TokenType.THEN) 4140 true = self._parse_conjunction() 4141 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4142 self._match(TokenType.END) 4143 this = self.expression(exp.If, this=condition, true=true, false=false) 4144 4145 return self._parse_window(this) 4146 4147 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4148 if not self._match_text_seq("VALUE", "FOR"): 4149 self._retreat(self._index - 1) 4150 return None 4151 4152 return self.expression( 4153 exp.NextValueFor, 4154 this=self._parse_column(), 4155 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4156 ) 4157 4158 def _parse_extract(self) -> exp.Extract: 4159 this = self._parse_function() or self._parse_var() or self._parse_type() 4160 4161 if self._match(TokenType.FROM): 4162 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4163 4164 if not self._match(TokenType.COMMA): 4165 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4166 4167 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4168 4169 def _parse_any_value(self) -> exp.AnyValue: 4170 this = self._parse_lambda() 4171 is_max = None 4172 having = None 4173 4174 if self._match(TokenType.HAVING): 4175 self._match_texts(("MAX", "MIN")) 4176 is_max = self._prev.text == "MAX" 4177 having = self._parse_column() 4178 4179 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4180 4181 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4182 this = self._parse_conjunction() 4183 4184 if not self._match(TokenType.ALIAS): 4185 if self._match(TokenType.COMMA): 4186 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4187 4188 self.raise_error("Expected AS after CAST") 4189 4190 fmt = None 4191 to = self._parse_types() 4192 4193 if not to: 4194 self.raise_error("Expected TYPE after CAST") 4195 elif isinstance(to, exp.Identifier): 4196 to = exp.DataType.build(to.name, udt=True) 4197 elif to.this == exp.DataType.Type.CHAR: 4198 if self._match(TokenType.CHARACTER_SET): 4199 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4200 elif self._match(TokenType.FORMAT): 4201 fmt_string = self._parse_string() 4202 fmt = self._parse_at_time_zone(fmt_string) 4203 4204 if to.this in exp.DataType.TEMPORAL_TYPES: 4205 this = self.expression( 4206 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4207 this=this, 4208 format=exp.Literal.string( 4209 format_time( 4210 fmt_string.this if fmt_string else "", 4211 self.FORMAT_MAPPING or self.TIME_MAPPING, 4212 self.FORMAT_TRIE or self.TIME_TRIE, 4213 ) 4214 ), 4215 ) 4216 4217 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4218 this.set("zone", fmt.args["zone"]) 4219 4220 return this 4221 4222 return self.expression( 4223 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4224 ) 4225 4226 def _parse_concat(self) -> t.Optional[exp.Expression]: 4227 args = self._parse_csv(self._parse_conjunction) 4228 if self.CONCAT_NULL_OUTPUTS_STRING: 4229 args = self._ensure_string_if_null(args) 4230 4231 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4232 # we find such a call we replace it with its argument. 4233 if len(args) == 1: 4234 return args[0] 4235 4236 return self.expression( 4237 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4238 ) 4239 4240 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4241 args = self._parse_csv(self._parse_conjunction) 4242 if len(args) < 2: 4243 return self.expression(exp.ConcatWs, expressions=args) 4244 delim, *values = args 4245 if self.CONCAT_NULL_OUTPUTS_STRING: 4246 values = self._ensure_string_if_null(values) 4247 4248 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4249 4250 def _parse_string_agg(self) -> exp.Expression: 4251 if self._match(TokenType.DISTINCT): 4252 args: t.List[t.Optional[exp.Expression]] = [ 4253 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4254 ] 4255 if self._match(TokenType.COMMA): 4256 args.extend(self._parse_csv(self._parse_conjunction)) 4257 else: 4258 args = self._parse_csv(self._parse_conjunction) # type: ignore 4259 4260 index = self._index 4261 if not self._match(TokenType.R_PAREN) and args: 4262 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4263 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4264 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4265 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4266 4267 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4268 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4269 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4270 if not self._match_text_seq("WITHIN", "GROUP"): 4271 self._retreat(index) 4272 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4273 4274 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4275 order = self._parse_order(this=seq_get(args, 0)) 4276 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4277 4278 def _parse_convert( 4279 self, strict: bool, safe: t.Optional[bool] = None 4280 ) -> t.Optional[exp.Expression]: 4281 this = self._parse_bitwise() 4282 4283 if self._match(TokenType.USING): 4284 to: t.Optional[exp.Expression] = self.expression( 4285 exp.CharacterSet, this=self._parse_var() 4286 ) 4287 elif self._match(TokenType.COMMA): 4288 to = self._parse_types() 4289 else: 4290 to = None 4291 4292 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4293 4294 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4295 """ 4296 There are generally two variants of the DECODE function: 4297 4298 - DECODE(bin, charset) 4299 - DECODE(expression, search, result [, search, result] ... [, default]) 4300 4301 The second variant will always be parsed into a CASE expression. Note that NULL 4302 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4303 instead of relying on pattern matching. 4304 """ 4305 args = self._parse_csv(self._parse_conjunction) 4306 4307 if len(args) < 3: 4308 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4309 4310 expression, *expressions = args 4311 if not expression: 4312 return None 4313 4314 ifs = [] 4315 for search, result in zip(expressions[::2], expressions[1::2]): 4316 if not search or not result: 4317 return None 4318 4319 if isinstance(search, exp.Literal): 4320 ifs.append( 4321 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4322 ) 4323 elif isinstance(search, exp.Null): 4324 ifs.append( 4325 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4326 ) 4327 else: 4328 cond = exp.or_( 4329 exp.EQ(this=expression.copy(), expression=search), 4330 exp.and_( 4331 exp.Is(this=expression.copy(), expression=exp.Null()), 4332 exp.Is(this=search.copy(), expression=exp.Null()), 4333 copy=False, 4334 ), 4335 copy=False, 4336 ) 4337 ifs.append(exp.If(this=cond, true=result)) 4338 4339 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4340 4341 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4342 self._match_text_seq("KEY") 4343 key = self._parse_column() 4344 self._match_set((TokenType.COLON, TokenType.COMMA)) 4345 self._match_text_seq("VALUE") 4346 value = self._parse_bitwise() 4347 4348 if not key and not value: 4349 return None 4350 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4351 4352 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4353 if not this or not self._match_text_seq("FORMAT", "JSON"): 4354 return this 4355 4356 return self.expression(exp.FormatJson, this=this) 4357 4358 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4359 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4360 for value in values: 4361 if self._match_text_seq(value, "ON", on): 4362 return f"{value} ON {on}" 4363 4364 return None 4365 4366 def _parse_json_object(self) -> exp.JSONObject: 4367 star = self._parse_star() 4368 expressions = ( 4369 [star] 4370 if star 4371 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4372 ) 4373 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4374 4375 unique_keys = None 4376 if self._match_text_seq("WITH", "UNIQUE"): 4377 unique_keys = True 4378 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4379 unique_keys = False 4380 4381 self._match_text_seq("KEYS") 4382 4383 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4384 self._parse_type() 4385 ) 4386 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4387 4388 return self.expression( 4389 exp.JSONObject, 4390 expressions=expressions, 4391 null_handling=null_handling, 4392 unique_keys=unique_keys, 4393 return_type=return_type, 4394 encoding=encoding, 4395 ) 4396 4397 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 4398 def _parse_json_column_def(self) -> exp.JSONColumnDef: 4399 if not self._match_text_seq("NESTED"): 4400 this = self._parse_id_var() 4401 kind = self._parse_types(allow_identifiers=False) 4402 nested = None 4403 else: 4404 this = None 4405 kind = None 4406 nested = True 4407 4408 path = self._match_text_seq("PATH") and self._parse_string() 4409 nested_schema = nested and self._parse_json_schema() 4410 4411 return self.expression( 4412 exp.JSONColumnDef, 4413 this=this, 4414 kind=kind, 4415 path=path, 4416 nested_schema=nested_schema, 4417 ) 4418 4419 def _parse_json_schema(self) -> exp.JSONSchema: 4420 self._match_text_seq("COLUMNS") 4421 return self.expression( 4422 exp.JSONSchema, 4423 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 4424 ) 4425 4426 def _parse_json_table(self) -> exp.JSONTable: 4427 this = self._parse_format_json(self._parse_bitwise()) 4428 path = self._match(TokenType.COMMA) and self._parse_string() 4429 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 4430 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 4431 schema = self._parse_json_schema() 4432 4433 return exp.JSONTable( 4434 this=this, 4435 schema=schema, 4436 path=path, 4437 error_handling=error_handling, 4438 empty_handling=empty_handling, 4439 ) 4440 4441 def _parse_logarithm(self) -> exp.Func: 4442 # Default argument order is base, expression 4443 args = self._parse_csv(self._parse_range) 4444 4445 if len(args) > 1: 4446 if not self.LOG_BASE_FIRST: 4447 args.reverse() 4448 return exp.Log.from_arg_list(args) 4449 4450 return self.expression( 4451 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4452 ) 4453 4454 def _parse_match_against(self) -> exp.MatchAgainst: 4455 expressions = self._parse_csv(self._parse_column) 4456 4457 self._match_text_seq(")", "AGAINST", "(") 4458 4459 this = self._parse_string() 4460 4461 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4462 modifier = "IN NATURAL LANGUAGE MODE" 4463 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4464 modifier = f"{modifier} WITH QUERY EXPANSION" 4465 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4466 modifier = "IN BOOLEAN MODE" 4467 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4468 modifier = "WITH QUERY EXPANSION" 4469 else: 4470 modifier = None 4471 4472 return self.expression( 4473 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4474 ) 4475 4476 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4477 def _parse_open_json(self) -> exp.OpenJSON: 4478 this = self._parse_bitwise() 4479 path = self._match(TokenType.COMMA) and self._parse_string() 4480 4481 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4482 this = self._parse_field(any_token=True) 4483 kind = self._parse_types() 4484 path = self._parse_string() 4485 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4486 4487 return self.expression( 4488 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4489 ) 4490 4491 expressions = None 4492 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4493 self._match_l_paren() 4494 expressions = self._parse_csv(_parse_open_json_column_def) 4495 4496 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4497 4498 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4499 args = self._parse_csv(self._parse_bitwise) 4500 4501 if self._match(TokenType.IN): 4502 return self.expression( 4503 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4504 ) 4505 4506 if haystack_first: 4507 haystack = seq_get(args, 0) 4508 needle = seq_get(args, 1) 4509 else: 4510 needle = seq_get(args, 0) 4511 haystack = seq_get(args, 1) 4512 4513 return self.expression( 4514 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4515 ) 4516 4517 def _parse_predict(self) -> exp.Predict: 4518 self._match_text_seq("MODEL") 4519 this = self._parse_table() 4520 4521 self._match(TokenType.COMMA) 4522 self._match_text_seq("TABLE") 4523 4524 return self.expression( 4525 exp.Predict, 4526 this=this, 4527 expression=self._parse_table(), 4528 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4529 ) 4530 4531 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4532 args = self._parse_csv(self._parse_table) 4533 return exp.JoinHint(this=func_name.upper(), expressions=args) 4534 4535 def _parse_substring(self) -> exp.Substring: 4536 # Postgres supports the form: substring(string [from int] [for int]) 4537 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4538 4539 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4540 4541 if self._match(TokenType.FROM): 4542 args.append(self._parse_bitwise()) 4543 if self._match(TokenType.FOR): 4544 args.append(self._parse_bitwise()) 4545 4546 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4547 4548 def _parse_trim(self) -> exp.Trim: 4549 # https://www.w3resource.com/sql/character-functions/trim.php 4550 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4551 4552 position = None 4553 collation = None 4554 expression = None 4555 4556 if self._match_texts(self.TRIM_TYPES): 4557 position = self._prev.text.upper() 4558 4559 this = self._parse_bitwise() 4560 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4561 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4562 expression = self._parse_bitwise() 4563 4564 if invert_order: 4565 this, expression = expression, this 4566 4567 if self._match(TokenType.COLLATE): 4568 collation = self._parse_bitwise() 4569 4570 return self.expression( 4571 exp.Trim, this=this, position=position, expression=expression, collation=collation 4572 ) 4573 4574 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4575 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4576 4577 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4578 return self._parse_window(self._parse_id_var(), alias=True) 4579 4580 def _parse_respect_or_ignore_nulls( 4581 self, this: t.Optional[exp.Expression] 4582 ) -> t.Optional[exp.Expression]: 4583 if self._match_text_seq("IGNORE", "NULLS"): 4584 return self.expression(exp.IgnoreNulls, this=this) 4585 if self._match_text_seq("RESPECT", "NULLS"): 4586 return self.expression(exp.RespectNulls, this=this) 4587 return this 4588 4589 def _parse_window( 4590 self, this: t.Optional[exp.Expression], alias: bool = False 4591 ) -> t.Optional[exp.Expression]: 4592 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4593 self._match(TokenType.WHERE) 4594 this = self.expression( 4595 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4596 ) 4597 self._match_r_paren() 4598 4599 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4600 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4601 if self._match_text_seq("WITHIN", "GROUP"): 4602 order = self._parse_wrapped(self._parse_order) 4603 this = self.expression(exp.WithinGroup, this=this, expression=order) 4604 4605 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4606 # Some dialects choose to implement and some do not. 4607 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4608 4609 # There is some code above in _parse_lambda that handles 4610 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4611 4612 # The below changes handle 4613 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4614 4615 # Oracle allows both formats 4616 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4617 # and Snowflake chose to do the same for familiarity 4618 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4619 this = self._parse_respect_or_ignore_nulls(this) 4620 4621 # bigquery select from window x AS (partition by ...) 4622 if alias: 4623 over = None 4624 self._match(TokenType.ALIAS) 4625 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4626 return this 4627 else: 4628 over = self._prev.text.upper() 4629 4630 if not self._match(TokenType.L_PAREN): 4631 return self.expression( 4632 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4633 ) 4634 4635 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4636 4637 first = self._match(TokenType.FIRST) 4638 if self._match_text_seq("LAST"): 4639 first = False 4640 4641 partition, order = self._parse_partition_and_order() 4642 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4643 4644 if kind: 4645 self._match(TokenType.BETWEEN) 4646 start = self._parse_window_spec() 4647 self._match(TokenType.AND) 4648 end = self._parse_window_spec() 4649 4650 spec = self.expression( 4651 exp.WindowSpec, 4652 kind=kind, 4653 start=start["value"], 4654 start_side=start["side"], 4655 end=end["value"], 4656 end_side=end["side"], 4657 ) 4658 else: 4659 spec = None 4660 4661 self._match_r_paren() 4662 4663 window = self.expression( 4664 exp.Window, 4665 this=this, 4666 partition_by=partition, 4667 order=order, 4668 spec=spec, 4669 alias=window_alias, 4670 over=over, 4671 first=first, 4672 ) 4673 4674 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4675 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4676 return self._parse_window(window, alias=alias) 4677 4678 return window 4679 4680 def _parse_partition_and_order( 4681 self, 4682 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4683 return self._parse_partition_by(), self._parse_order() 4684 4685 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4686 self._match(TokenType.BETWEEN) 4687 4688 return { 4689 "value": ( 4690 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4691 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4692 or self._parse_bitwise() 4693 ), 4694 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4695 } 4696 4697 def _parse_alias( 4698 self, this: t.Optional[exp.Expression], explicit: bool = False 4699 ) -> t.Optional[exp.Expression]: 4700 any_token = self._match(TokenType.ALIAS) 4701 4702 if explicit and not any_token: 4703 return this 4704 4705 if self._match(TokenType.L_PAREN): 4706 aliases = self.expression( 4707 exp.Aliases, 4708 this=this, 4709 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4710 ) 4711 self._match_r_paren(aliases) 4712 return aliases 4713 4714 alias = self._parse_id_var(any_token) 4715 4716 if alias: 4717 return self.expression(exp.Alias, this=this, alias=alias) 4718 4719 return this 4720 4721 def _parse_id_var( 4722 self, 4723 any_token: bool = True, 4724 tokens: t.Optional[t.Collection[TokenType]] = None, 4725 ) -> t.Optional[exp.Expression]: 4726 identifier = self._parse_identifier() 4727 4728 if identifier: 4729 return identifier 4730 4731 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4732 quoted = self._prev.token_type == TokenType.STRING 4733 return exp.Identifier(this=self._prev.text, quoted=quoted) 4734 4735 return None 4736 4737 def _parse_string(self) -> t.Optional[exp.Expression]: 4738 if self._match(TokenType.STRING): 4739 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4740 return self._parse_placeholder() 4741 4742 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4743 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4744 4745 def _parse_number(self) -> t.Optional[exp.Expression]: 4746 if self._match(TokenType.NUMBER): 4747 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4748 return self._parse_placeholder() 4749 4750 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4751 if self._match(TokenType.IDENTIFIER): 4752 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4753 return self._parse_placeholder() 4754 4755 def _parse_var( 4756 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4757 ) -> t.Optional[exp.Expression]: 4758 if ( 4759 (any_token and self._advance_any()) 4760 or self._match(TokenType.VAR) 4761 or (self._match_set(tokens) if tokens else False) 4762 ): 4763 return self.expression(exp.Var, this=self._prev.text) 4764 return self._parse_placeholder() 4765 4766 def _advance_any(self) -> t.Optional[Token]: 4767 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4768 self._advance() 4769 return self._prev 4770 return None 4771 4772 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4773 return self._parse_var() or self._parse_string() 4774 4775 def _parse_null(self) -> t.Optional[exp.Expression]: 4776 if self._match_set(self.NULL_TOKENS): 4777 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4778 return self._parse_placeholder() 4779 4780 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4781 if self._match(TokenType.TRUE): 4782 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4783 if self._match(TokenType.FALSE): 4784 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4785 return self._parse_placeholder() 4786 4787 def _parse_star(self) -> t.Optional[exp.Expression]: 4788 if self._match(TokenType.STAR): 4789 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4790 return self._parse_placeholder() 4791 4792 def _parse_parameter(self) -> exp.Parameter: 4793 wrapped = self._match(TokenType.L_BRACE) 4794 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4795 self._match(TokenType.R_BRACE) 4796 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4797 4798 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4799 if self._match_set(self.PLACEHOLDER_PARSERS): 4800 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4801 if placeholder: 4802 return placeholder 4803 self._advance(-1) 4804 return None 4805 4806 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4807 if not self._match(TokenType.EXCEPT): 4808 return None 4809 if self._match(TokenType.L_PAREN, advance=False): 4810 return self._parse_wrapped_csv(self._parse_column) 4811 4812 except_column = self._parse_column() 4813 return [except_column] if except_column else None 4814 4815 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4816 if not self._match(TokenType.REPLACE): 4817 return None 4818 if self._match(TokenType.L_PAREN, advance=False): 4819 return self._parse_wrapped_csv(self._parse_expression) 4820 4821 replace_expression = self._parse_expression() 4822 return [replace_expression] if replace_expression else None 4823 4824 def _parse_csv( 4825 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4826 ) -> t.List[exp.Expression]: 4827 parse_result = parse_method() 4828 items = [parse_result] if parse_result is not None else [] 4829 4830 while self._match(sep): 4831 self._add_comments(parse_result) 4832 parse_result = parse_method() 4833 if parse_result is not None: 4834 items.append(parse_result) 4835 4836 return items 4837 4838 def _parse_tokens( 4839 self, parse_method: t.Callable, expressions: t.Dict 4840 ) -> t.Optional[exp.Expression]: 4841 this = parse_method() 4842 4843 while self._match_set(expressions): 4844 this = self.expression( 4845 expressions[self._prev.token_type], 4846 this=this, 4847 comments=self._prev_comments, 4848 expression=parse_method(), 4849 ) 4850 4851 return this 4852 4853 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4854 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4855 4856 def _parse_wrapped_csv( 4857 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4858 ) -> t.List[exp.Expression]: 4859 return self._parse_wrapped( 4860 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4861 ) 4862 4863 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4864 wrapped = self._match(TokenType.L_PAREN) 4865 if not wrapped and not optional: 4866 self.raise_error("Expecting (") 4867 parse_result = parse_method() 4868 if wrapped: 4869 self._match_r_paren() 4870 return parse_result 4871 4872 def _parse_expressions(self) -> t.List[exp.Expression]: 4873 return self._parse_csv(self._parse_expression) 4874 4875 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4876 return self._parse_select() or self._parse_set_operations( 4877 self._parse_expression() if alias else self._parse_conjunction() 4878 ) 4879 4880 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4881 return self._parse_query_modifiers( 4882 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4883 ) 4884 4885 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4886 this = None 4887 if self._match_texts(self.TRANSACTION_KIND): 4888 this = self._prev.text 4889 4890 self._match_texts({"TRANSACTION", "WORK"}) 4891 4892 modes = [] 4893 while True: 4894 mode = [] 4895 while self._match(TokenType.VAR): 4896 mode.append(self._prev.text) 4897 4898 if mode: 4899 modes.append(" ".join(mode)) 4900 if not self._match(TokenType.COMMA): 4901 break 4902 4903 return self.expression(exp.Transaction, this=this, modes=modes) 4904 4905 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4906 chain = None 4907 savepoint = None 4908 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4909 4910 self._match_texts({"TRANSACTION", "WORK"}) 4911 4912 if self._match_text_seq("TO"): 4913 self._match_text_seq("SAVEPOINT") 4914 savepoint = self._parse_id_var() 4915 4916 if self._match(TokenType.AND): 4917 chain = not self._match_text_seq("NO") 4918 self._match_text_seq("CHAIN") 4919 4920 if is_rollback: 4921 return self.expression(exp.Rollback, savepoint=savepoint) 4922 4923 return self.expression(exp.Commit, chain=chain) 4924 4925 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4926 if not self._match_text_seq("ADD"): 4927 return None 4928 4929 self._match(TokenType.COLUMN) 4930 exists_column = self._parse_exists(not_=True) 4931 expression = self._parse_field_def() 4932 4933 if expression: 4934 expression.set("exists", exists_column) 4935 4936 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4937 if self._match_texts(("FIRST", "AFTER")): 4938 position = self._prev.text 4939 column_position = self.expression( 4940 exp.ColumnPosition, this=self._parse_column(), position=position 4941 ) 4942 expression.set("position", column_position) 4943 4944 return expression 4945 4946 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4947 drop = self._match(TokenType.DROP) and self._parse_drop() 4948 if drop and not isinstance(drop, exp.Command): 4949 drop.set("kind", drop.args.get("kind", "COLUMN")) 4950 return drop 4951 4952 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4953 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4954 return self.expression( 4955 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4956 ) 4957 4958 def _parse_add_constraint(self) -> exp.AddConstraint: 4959 this = None 4960 kind = self._prev.token_type 4961 4962 if kind == TokenType.CONSTRAINT: 4963 this = self._parse_id_var() 4964 4965 if self._match_text_seq("CHECK"): 4966 expression = self._parse_wrapped(self._parse_conjunction) 4967 enforced = self._match_text_seq("ENFORCED") 4968 4969 return self.expression( 4970 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4971 ) 4972 4973 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4974 expression = self._parse_foreign_key() 4975 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4976 expression = self._parse_primary_key() 4977 else: 4978 expression = None 4979 4980 return self.expression(exp.AddConstraint, this=this, expression=expression) 4981 4982 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4983 index = self._index - 1 4984 4985 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4986 return self._parse_csv(self._parse_add_constraint) 4987 4988 self._retreat(index) 4989 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4990 return self._parse_csv(self._parse_field_def) 4991 4992 return self._parse_csv(self._parse_add_column) 4993 4994 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4995 self._match(TokenType.COLUMN) 4996 column = self._parse_field(any_token=True) 4997 4998 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4999 return self.expression(exp.AlterColumn, this=column, drop=True) 5000 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5001 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5002 5003 self._match_text_seq("SET", "DATA") 5004 return self.expression( 5005 exp.AlterColumn, 5006 this=column, 5007 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5008 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5009 using=self._match(TokenType.USING) and self._parse_conjunction(), 5010 ) 5011 5012 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5013 index = self._index - 1 5014 5015 partition_exists = self._parse_exists() 5016 if self._match(TokenType.PARTITION, advance=False): 5017 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5018 5019 self._retreat(index) 5020 return self._parse_csv(self._parse_drop_column) 5021 5022 def _parse_alter_table_rename(self) -> exp.RenameTable: 5023 self._match_text_seq("TO") 5024 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5025 5026 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5027 start = self._prev 5028 5029 if not self._match(TokenType.TABLE): 5030 return self._parse_as_command(start) 5031 5032 exists = self._parse_exists() 5033 only = self._match_text_seq("ONLY") 5034 this = self._parse_table(schema=True) 5035 5036 if self._next: 5037 self._advance() 5038 5039 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5040 if parser: 5041 actions = ensure_list(parser(self)) 5042 5043 if not self._curr: 5044 return self.expression( 5045 exp.AlterTable, 5046 this=this, 5047 exists=exists, 5048 actions=actions, 5049 only=only, 5050 ) 5051 5052 return self._parse_as_command(start) 5053 5054 def _parse_merge(self) -> exp.Merge: 5055 self._match(TokenType.INTO) 5056 target = self._parse_table() 5057 5058 if target and self._match(TokenType.ALIAS, advance=False): 5059 target.set("alias", self._parse_table_alias()) 5060 5061 self._match(TokenType.USING) 5062 using = self._parse_table() 5063 5064 self._match(TokenType.ON) 5065 on = self._parse_conjunction() 5066 5067 return self.expression( 5068 exp.Merge, 5069 this=target, 5070 using=using, 5071 on=on, 5072 expressions=self._parse_when_matched(), 5073 ) 5074 5075 def _parse_when_matched(self) -> t.List[exp.When]: 5076 whens = [] 5077 5078 while self._match(TokenType.WHEN): 5079 matched = not self._match(TokenType.NOT) 5080 self._match_text_seq("MATCHED") 5081 source = ( 5082 False 5083 if self._match_text_seq("BY", "TARGET") 5084 else self._match_text_seq("BY", "SOURCE") 5085 ) 5086 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5087 5088 self._match(TokenType.THEN) 5089 5090 if self._match(TokenType.INSERT): 5091 _this = self._parse_star() 5092 if _this: 5093 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5094 else: 5095 then = self.expression( 5096 exp.Insert, 5097 this=self._parse_value(), 5098 expression=self._match(TokenType.VALUES) and self._parse_value(), 5099 ) 5100 elif self._match(TokenType.UPDATE): 5101 expressions = self._parse_star() 5102 if expressions: 5103 then = self.expression(exp.Update, expressions=expressions) 5104 else: 5105 then = self.expression( 5106 exp.Update, 5107 expressions=self._match(TokenType.SET) 5108 and self._parse_csv(self._parse_equality), 5109 ) 5110 elif self._match(TokenType.DELETE): 5111 then = self.expression(exp.Var, this=self._prev.text) 5112 else: 5113 then = None 5114 5115 whens.append( 5116 self.expression( 5117 exp.When, 5118 matched=matched, 5119 source=source, 5120 condition=condition, 5121 then=then, 5122 ) 5123 ) 5124 return whens 5125 5126 def _parse_show(self) -> t.Optional[exp.Expression]: 5127 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5128 if parser: 5129 return parser(self) 5130 return self._parse_as_command(self._prev) 5131 5132 def _parse_set_item_assignment( 5133 self, kind: t.Optional[str] = None 5134 ) -> t.Optional[exp.Expression]: 5135 index = self._index 5136 5137 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5138 return self._parse_set_transaction(global_=kind == "GLOBAL") 5139 5140 left = self._parse_primary() or self._parse_id_var() 5141 assignment_delimiter = self._match_texts(("=", "TO")) 5142 5143 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5144 self._retreat(index) 5145 return None 5146 5147 right = self._parse_statement() or self._parse_id_var() 5148 this = self.expression(exp.EQ, this=left, expression=right) 5149 5150 return self.expression(exp.SetItem, this=this, kind=kind) 5151 5152 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5153 self._match_text_seq("TRANSACTION") 5154 characteristics = self._parse_csv( 5155 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5156 ) 5157 return self.expression( 5158 exp.SetItem, 5159 expressions=characteristics, 5160 kind="TRANSACTION", 5161 **{"global": global_}, # type: ignore 5162 ) 5163 5164 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5165 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5166 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5167 5168 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5169 index = self._index 5170 set_ = self.expression( 5171 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5172 ) 5173 5174 if self._curr: 5175 self._retreat(index) 5176 return self._parse_as_command(self._prev) 5177 5178 return set_ 5179 5180 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5181 for option in options: 5182 if self._match_text_seq(*option.split(" ")): 5183 return exp.var(option) 5184 return None 5185 5186 def _parse_as_command(self, start: Token) -> exp.Command: 5187 while self._curr: 5188 self._advance() 5189 text = self._find_sql(start, self._prev) 5190 size = len(start.text) 5191 return exp.Command(this=text[:size], expression=text[size:]) 5192 5193 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5194 settings = [] 5195 5196 self._match_l_paren() 5197 kind = self._parse_id_var() 5198 5199 if self._match(TokenType.L_PAREN): 5200 while True: 5201 key = self._parse_id_var() 5202 value = self._parse_primary() 5203 5204 if not key and value is None: 5205 break 5206 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5207 self._match(TokenType.R_PAREN) 5208 5209 self._match_r_paren() 5210 5211 return self.expression( 5212 exp.DictProperty, 5213 this=this, 5214 kind=kind.this if kind else None, 5215 settings=settings, 5216 ) 5217 5218 def _parse_dict_range(self, this: str) -> exp.DictRange: 5219 self._match_l_paren() 5220 has_min = self._match_text_seq("MIN") 5221 if has_min: 5222 min = self._parse_var() or self._parse_primary() 5223 self._match_text_seq("MAX") 5224 max = self._parse_var() or self._parse_primary() 5225 else: 5226 max = self._parse_var() or self._parse_primary() 5227 min = exp.Literal.number(0) 5228 self._match_r_paren() 5229 return self.expression(exp.DictRange, this=this, min=min, max=max) 5230 5231 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5232 index = self._index 5233 expression = self._parse_column() 5234 if not self._match(TokenType.IN): 5235 self._retreat(index - 1) 5236 return None 5237 iterator = self._parse_column() 5238 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5239 return self.expression( 5240 exp.Comprehension, 5241 this=this, 5242 expression=expression, 5243 iterator=iterator, 5244 condition=condition, 5245 ) 5246 5247 def _find_parser( 5248 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5249 ) -> t.Optional[t.Callable]: 5250 if not self._curr: 5251 return None 5252 5253 index = self._index 5254 this = [] 5255 while True: 5256 # The current token might be multiple words 5257 curr = self._curr.text.upper() 5258 key = curr.split(" ") 5259 this.append(curr) 5260 5261 self._advance() 5262 result, trie = in_trie(trie, key) 5263 if result == TrieResult.FAILED: 5264 break 5265 5266 if result == TrieResult.EXISTS: 5267 subparser = parsers[" ".join(this)] 5268 return subparser 5269 5270 self._retreat(index) 5271 return None 5272 5273 def _match(self, token_type, advance=True, expression=None): 5274 if not self._curr: 5275 return None 5276 5277 if self._curr.token_type == token_type: 5278 if advance: 5279 self._advance() 5280 self._add_comments(expression) 5281 return True 5282 5283 return None 5284 5285 def _match_set(self, types, advance=True): 5286 if not self._curr: 5287 return None 5288 5289 if self._curr.token_type in types: 5290 if advance: 5291 self._advance() 5292 return True 5293 5294 return None 5295 5296 def _match_pair(self, token_type_a, token_type_b, advance=True): 5297 if not self._curr or not self._next: 5298 return None 5299 5300 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5301 if advance: 5302 self._advance(2) 5303 return True 5304 5305 return None 5306 5307 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5308 if not self._match(TokenType.L_PAREN, expression=expression): 5309 self.raise_error("Expecting (") 5310 5311 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5312 if not self._match(TokenType.R_PAREN, expression=expression): 5313 self.raise_error("Expecting )") 5314 5315 def _match_texts(self, texts, advance=True): 5316 if self._curr and self._curr.text.upper() in texts: 5317 if advance: 5318 self._advance() 5319 return True 5320 return False 5321 5322 def _match_text_seq(self, *texts, advance=True): 5323 index = self._index 5324 for text in texts: 5325 if self._curr and self._curr.text.upper() == text: 5326 self._advance() 5327 else: 5328 self._retreat(index) 5329 return False 5330 5331 if not advance: 5332 self._retreat(index) 5333 5334 return True 5335 5336 @t.overload 5337 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5338 ... 5339 5340 @t.overload 5341 def _replace_columns_with_dots( 5342 self, this: t.Optional[exp.Expression] 5343 ) -> t.Optional[exp.Expression]: 5344 ... 5345 5346 def _replace_columns_with_dots(self, this): 5347 if isinstance(this, exp.Dot): 5348 exp.replace_children(this, self._replace_columns_with_dots) 5349 elif isinstance(this, exp.Column): 5350 exp.replace_children(this, self._replace_columns_with_dots) 5351 table = this.args.get("table") 5352 this = ( 5353 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5354 ) 5355 5356 return this 5357 5358 def _replace_lambda( 5359 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5360 ) -> t.Optional[exp.Expression]: 5361 if not node: 5362 return node 5363 5364 for column in node.find_all(exp.Column): 5365 if column.parts[0].name in lambda_variables: 5366 dot_or_id = column.to_dot() if column.table else column.this 5367 parent = column.parent 5368 5369 while isinstance(parent, exp.Dot): 5370 if not isinstance(parent.parent, exp.Dot): 5371 parent.replace(dot_or_id) 5372 break 5373 parent = parent.parent 5374 else: 5375 if column is node: 5376 node = dot_or_id 5377 else: 5378 column.replace(dot_or_id) 5379 return node 5380 5381 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5382 return [ 5383 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5384 for value in values 5385 if value 5386 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
951 def __init__( 952 self, 953 error_level: t.Optional[ErrorLevel] = None, 954 error_message_context: int = 100, 955 max_errors: int = 3, 956 ): 957 self.error_level = error_level or ErrorLevel.IMMEDIATE 958 self.error_message_context = error_message_context 959 self.max_errors = max_errors 960 self._tokenizer = self.TOKENIZER_CLASS() 961 self.reset()
973 def parse( 974 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 975 ) -> t.List[t.Optional[exp.Expression]]: 976 """ 977 Parses a list of tokens and returns a list of syntax trees, one tree 978 per parsed SQL statement. 979 980 Args: 981 raw_tokens: The list of tokens. 982 sql: The original SQL string, used to produce helpful debug messages. 983 984 Returns: 985 The list of the produced syntax trees. 986 """ 987 return self._parse( 988 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 989 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
991 def parse_into( 992 self, 993 expression_types: exp.IntoType, 994 raw_tokens: t.List[Token], 995 sql: t.Optional[str] = None, 996 ) -> t.List[t.Optional[exp.Expression]]: 997 """ 998 Parses a list of tokens into a given Expression type. If a collection of Expression 999 types is given instead, this method will try to parse the token list into each one 1000 of them, stopping at the first for which the parsing succeeds. 1001 1002 Args: 1003 expression_types: The expression type(s) to try and parse the token list into. 1004 raw_tokens: The list of tokens. 1005 sql: The original SQL string, used to produce helpful debug messages. 1006 1007 Returns: 1008 The target Expression. 1009 """ 1010 errors = [] 1011 for expression_type in ensure_list(expression_types): 1012 parser = self.EXPRESSION_PARSERS.get(expression_type) 1013 if not parser: 1014 raise TypeError(f"No parser registered for {expression_type}") 1015 1016 try: 1017 return self._parse(parser, raw_tokens, sql) 1018 except ParseError as e: 1019 e.errors[0]["into_expression"] = expression_type 1020 errors.append(e) 1021 1022 raise ParseError( 1023 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1024 errors=merge_errors(errors), 1025 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1062 def check_errors(self) -> None: 1063 """Logs or raises any found errors, depending on the chosen error level setting.""" 1064 if self.error_level == ErrorLevel.WARN: 1065 for error in self.errors: 1066 logger.error(str(error)) 1067 elif self.error_level == ErrorLevel.RAISE and self.errors: 1068 raise ParseError( 1069 concat_messages(self.errors, self.max_errors), 1070 errors=merge_errors(self.errors), 1071 )
Logs or raises any found errors, depending on the chosen error level setting.
1073 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1074 """ 1075 Appends an error in the list of recorded errors or raises it, depending on the chosen 1076 error level setting. 1077 """ 1078 token = token or self._curr or self._prev or Token.string("") 1079 start = token.start 1080 end = token.end + 1 1081 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1082 highlight = self.sql[start:end] 1083 end_context = self.sql[end : end + self.error_message_context] 1084 1085 error = ParseError.new( 1086 f"{message}. Line {token.line}, Col: {token.col}.\n" 1087 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1088 description=message, 1089 line=token.line, 1090 col=token.col, 1091 start_context=start_context, 1092 highlight=highlight, 1093 end_context=end_context, 1094 ) 1095 1096 if self.error_level == ErrorLevel.IMMEDIATE: 1097 raise error 1098 1099 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1101 def expression( 1102 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1103 ) -> E: 1104 """ 1105 Creates a new, validated Expression. 1106 1107 Args: 1108 exp_class: The expression class to instantiate. 1109 comments: An optional list of comments to attach to the expression. 1110 kwargs: The arguments to set for the expression along with their respective values. 1111 1112 Returns: 1113 The target expression. 1114 """ 1115 instance = exp_class(**kwargs) 1116 instance.add_comments(comments) if comments else self._add_comments(instance) 1117 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1124 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1125 """ 1126 Validates an Expression, making sure that all its mandatory arguments are set. 1127 1128 Args: 1129 expression: The expression to validate. 1130 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1131 1132 Returns: 1133 The validated expression. 1134 """ 1135 if self.error_level != ErrorLevel.IGNORE: 1136 for error_message in expression.error_messages(args): 1137 self.raise_error(error_message) 1138 1139 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.