sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.DICTIONARY, 240 } 241 242 CREATABLES = { 243 TokenType.COLUMN, 244 TokenType.FUNCTION, 245 TokenType.INDEX, 246 TokenType.PROCEDURE, 247 *DB_CREATABLES, 248 } 249 250 # Tokens that can represent identifiers 251 ID_VAR_TOKENS = { 252 TokenType.VAR, 253 TokenType.ANTI, 254 TokenType.APPLY, 255 TokenType.ASC, 256 TokenType.AUTO_INCREMENT, 257 TokenType.BEGIN, 258 TokenType.CACHE, 259 TokenType.CASE, 260 TokenType.COLLATE, 261 TokenType.COMMAND, 262 TokenType.COMMENT, 263 TokenType.COMMIT, 264 TokenType.CONSTRAINT, 265 TokenType.DEFAULT, 266 TokenType.DELETE, 267 TokenType.DESC, 268 TokenType.DESCRIBE, 269 TokenType.DICTIONARY, 270 TokenType.DIV, 271 TokenType.END, 272 TokenType.EXECUTE, 273 TokenType.ESCAPE, 274 TokenType.FALSE, 275 TokenType.FIRST, 276 TokenType.FILTER, 277 TokenType.FORMAT, 278 TokenType.FULL, 279 TokenType.IS, 280 TokenType.ISNULL, 281 TokenType.INTERVAL, 282 TokenType.KEEP, 283 TokenType.KILL, 284 TokenType.LEFT, 285 TokenType.LOAD, 286 TokenType.MERGE, 287 TokenType.NATURAL, 288 TokenType.NEXT, 289 TokenType.OFFSET, 290 TokenType.ORDINALITY, 291 TokenType.OVERLAPS, 292 TokenType.OVERWRITE, 293 TokenType.PARTITION, 294 TokenType.PERCENT, 295 TokenType.PIVOT, 296 TokenType.PRAGMA, 297 TokenType.RANGE, 298 TokenType.REFERENCES, 299 TokenType.RIGHT, 300 TokenType.ROW, 301 TokenType.ROWS, 302 TokenType.SEMI, 303 TokenType.SET, 304 TokenType.SETTINGS, 305 TokenType.SHOW, 306 TokenType.TEMPORARY, 307 TokenType.TOP, 308 TokenType.TRUE, 309 TokenType.UNIQUE, 310 TokenType.UNPIVOT, 311 TokenType.UPDATE, 312 TokenType.VOLATILE, 313 TokenType.WINDOW, 314 *CREATABLES, 315 *SUBQUERY_PREDICATES, 316 *TYPE_TOKENS, 317 *NO_PAREN_FUNCTIONS, 318 } 319 320 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 321 322 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 323 TokenType.ANTI, 324 TokenType.APPLY, 325 TokenType.ASOF, 326 TokenType.FULL, 327 TokenType.LEFT, 328 TokenType.LOCK, 329 TokenType.NATURAL, 330 TokenType.OFFSET, 331 TokenType.RIGHT, 332 TokenType.SEMI, 333 TokenType.WINDOW, 334 } 335 336 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 337 338 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 339 340 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 341 342 FUNC_TOKENS = { 343 TokenType.COLLATE, 344 TokenType.COMMAND, 345 TokenType.CURRENT_DATE, 346 TokenType.CURRENT_DATETIME, 347 TokenType.CURRENT_TIMESTAMP, 348 TokenType.CURRENT_TIME, 349 TokenType.CURRENT_USER, 350 TokenType.FILTER, 351 TokenType.FIRST, 352 TokenType.FORMAT, 353 TokenType.GLOB, 354 TokenType.IDENTIFIER, 355 TokenType.INDEX, 356 TokenType.ISNULL, 357 TokenType.ILIKE, 358 TokenType.INSERT, 359 TokenType.LIKE, 360 TokenType.MERGE, 361 TokenType.OFFSET, 362 TokenType.PRIMARY_KEY, 363 TokenType.RANGE, 364 TokenType.REPLACE, 365 TokenType.RLIKE, 366 TokenType.ROW, 367 TokenType.UNNEST, 368 TokenType.VAR, 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.DATE, 372 TokenType.DATETIME, 373 TokenType.TABLE, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.WINDOW, 377 TokenType.XOR, 378 *TYPE_TOKENS, 379 *SUBQUERY_PREDICATES, 380 } 381 382 CONJUNCTION = { 383 TokenType.AND: exp.And, 384 TokenType.OR: exp.Or, 385 } 386 387 EQUALITY = { 388 TokenType.EQ: exp.EQ, 389 TokenType.NEQ: exp.NEQ, 390 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 391 } 392 393 COMPARISON = { 394 TokenType.GT: exp.GT, 395 TokenType.GTE: exp.GTE, 396 TokenType.LT: exp.LT, 397 TokenType.LTE: exp.LTE, 398 } 399 400 BITWISE = { 401 TokenType.AMP: exp.BitwiseAnd, 402 TokenType.CARET: exp.BitwiseXor, 403 TokenType.PIPE: exp.BitwiseOr, 404 TokenType.DPIPE: exp.DPipe, 405 } 406 407 TERM = { 408 TokenType.DASH: exp.Sub, 409 TokenType.PLUS: exp.Add, 410 TokenType.MOD: exp.Mod, 411 TokenType.COLLATE: exp.Collate, 412 } 413 414 FACTOR = { 415 TokenType.DIV: exp.IntDiv, 416 TokenType.LR_ARROW: exp.Distance, 417 TokenType.SLASH: exp.Div, 418 TokenType.STAR: exp.Mul, 419 } 420 421 TIMES = { 422 TokenType.TIME, 423 TokenType.TIMETZ, 424 } 425 426 TIMESTAMPS = { 427 TokenType.TIMESTAMP, 428 TokenType.TIMESTAMPTZ, 429 TokenType.TIMESTAMPLTZ, 430 *TIMES, 431 } 432 433 SET_OPERATIONS = { 434 TokenType.UNION, 435 TokenType.INTERSECT, 436 TokenType.EXCEPT, 437 } 438 439 JOIN_METHODS = { 440 TokenType.NATURAL, 441 TokenType.ASOF, 442 } 443 444 JOIN_SIDES = { 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.FULL, 448 } 449 450 JOIN_KINDS = { 451 TokenType.INNER, 452 TokenType.OUTER, 453 TokenType.CROSS, 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 JOIN_HINTS: t.Set[str] = set() 459 460 LAMBDAS = { 461 TokenType.ARROW: lambda self, expressions: self.expression( 462 exp.Lambda, 463 this=self._replace_lambda( 464 self._parse_conjunction(), 465 {node.name for node in expressions}, 466 ), 467 expressions=expressions, 468 ), 469 TokenType.FARROW: lambda self, expressions: self.expression( 470 exp.Kwarg, 471 this=exp.var(expressions[0].name), 472 expression=self._parse_conjunction(), 473 ), 474 } 475 476 COLUMN_OPERATORS = { 477 TokenType.DOT: None, 478 TokenType.DCOLON: lambda self, this, to: self.expression( 479 exp.Cast if self.STRICT_CAST else exp.TryCast, 480 this=this, 481 to=to, 482 ), 483 TokenType.ARROW: lambda self, this, path: self.expression( 484 exp.JSONExtract, 485 this=this, 486 expression=path, 487 ), 488 TokenType.DARROW: lambda self, this, path: self.expression( 489 exp.JSONExtractScalar, 490 this=this, 491 expression=path, 492 ), 493 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 494 exp.JSONBExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 504 exp.JSONBContains, 505 this=this, 506 expression=key, 507 ), 508 } 509 510 EXPRESSION_PARSERS = { 511 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 512 exp.Column: lambda self: self._parse_column(), 513 exp.Condition: lambda self: self._parse_conjunction(), 514 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 515 exp.Expression: lambda self: self._parse_statement(), 516 exp.From: lambda self: self._parse_from(), 517 exp.Group: lambda self: self._parse_group(), 518 exp.Having: lambda self: self._parse_having(), 519 exp.Identifier: lambda self: self._parse_id_var(), 520 exp.Join: lambda self: self._parse_join(), 521 exp.Lambda: lambda self: self._parse_lambda(), 522 exp.Lateral: lambda self: self._parse_lateral(), 523 exp.Limit: lambda self: self._parse_limit(), 524 exp.Offset: lambda self: self._parse_offset(), 525 exp.Order: lambda self: self._parse_order(), 526 exp.Ordered: lambda self: self._parse_ordered(), 527 exp.Properties: lambda self: self._parse_properties(), 528 exp.Qualify: lambda self: self._parse_qualify(), 529 exp.Returning: lambda self: self._parse_returning(), 530 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 531 exp.Table: lambda self: self._parse_table_parts(), 532 exp.TableAlias: lambda self: self._parse_table_alias(), 533 exp.Where: lambda self: self._parse_where(), 534 exp.Window: lambda self: self._parse_named_window(), 535 exp.With: lambda self: self._parse_with(), 536 "JOIN_TYPE": lambda self: self._parse_join_parts(), 537 } 538 539 STATEMENT_PARSERS = { 540 TokenType.ALTER: lambda self: self._parse_alter(), 541 TokenType.BEGIN: lambda self: self._parse_transaction(), 542 TokenType.CACHE: lambda self: self._parse_cache(), 543 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 544 TokenType.COMMENT: lambda self: self._parse_comment(), 545 TokenType.CREATE: lambda self: self._parse_create(), 546 TokenType.DELETE: lambda self: self._parse_delete(), 547 TokenType.DESC: lambda self: self._parse_describe(), 548 TokenType.DESCRIBE: lambda self: self._parse_describe(), 549 TokenType.DROP: lambda self: self._parse_drop(), 550 TokenType.INSERT: lambda self: self._parse_insert(), 551 TokenType.KILL: lambda self: self._parse_kill(), 552 TokenType.LOAD: lambda self: self._parse_load(), 553 TokenType.MERGE: lambda self: self._parse_merge(), 554 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 555 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 556 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 557 TokenType.SET: lambda self: self._parse_set(), 558 TokenType.UNCACHE: lambda self: self._parse_uncache(), 559 TokenType.UPDATE: lambda self: self._parse_update(), 560 TokenType.USE: lambda self: self.expression( 561 exp.Use, 562 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 563 and exp.var(self._prev.text), 564 this=self._parse_table(schema=False), 565 ), 566 } 567 568 UNARY_PARSERS = { 569 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 570 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 571 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 572 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 573 } 574 575 PRIMARY_PARSERS = { 576 TokenType.STRING: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=True 578 ), 579 TokenType.NUMBER: lambda self, token: self.expression( 580 exp.Literal, this=token.text, is_string=False 581 ), 582 TokenType.STAR: lambda self, _: self.expression( 583 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 584 ), 585 TokenType.NULL: lambda self, _: self.expression(exp.Null), 586 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 587 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 588 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 589 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 590 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 591 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 592 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 593 exp.National, this=token.text 594 ), 595 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 596 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 597 exp.RawString, this=token.text 598 ), 599 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 600 } 601 602 PLACEHOLDER_PARSERS = { 603 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 604 TokenType.PARAMETER: lambda self: self._parse_parameter(), 605 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 606 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 607 else None, 608 } 609 610 RANGE_PARSERS = { 611 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 612 TokenType.GLOB: binary_range_parser(exp.Glob), 613 TokenType.ILIKE: binary_range_parser(exp.ILike), 614 TokenType.IN: lambda self, this: self._parse_in(this), 615 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 616 TokenType.IS: lambda self, this: self._parse_is(this), 617 TokenType.LIKE: binary_range_parser(exp.Like), 618 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 619 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 620 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 621 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 622 } 623 624 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 625 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 626 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 627 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 628 "CHARACTER SET": lambda self: self._parse_character_set(), 629 "CHECKSUM": lambda self: self._parse_checksum(), 630 "CLUSTER BY": lambda self: self._parse_cluster(), 631 "CLUSTERED": lambda self: self._parse_clustered_by(), 632 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 633 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 634 "COPY": lambda self: self._parse_copy_property(), 635 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 636 "DEFINER": lambda self: self._parse_definer(), 637 "DETERMINISTIC": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "DISTKEY": lambda self: self._parse_distkey(), 641 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 642 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 643 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 644 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 645 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 646 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "FREESPACE": lambda self: self._parse_freespace(), 648 "HEAP": lambda self: self.expression(exp.HeapProperty), 649 "IMMUTABLE": lambda self: self.expression( 650 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 651 ), 652 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 653 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 654 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 655 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 656 "LIKE": lambda self: self._parse_create_like(), 657 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 658 "LOCK": lambda self: self._parse_locking(), 659 "LOCKING": lambda self: self._parse_locking(), 660 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 661 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 662 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 663 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 664 "NO": lambda self: self._parse_no_property(), 665 "ON": lambda self: self._parse_on_property(), 666 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 667 "PARTITION BY": lambda self: self._parse_partitioned_by(), 668 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 671 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 672 "RETURNS": lambda self: self._parse_returns(), 673 "ROW": lambda self: self._parse_row(), 674 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 675 "SAMPLE": lambda self: self.expression( 676 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 677 ), 678 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 679 "SETTINGS": lambda self: self.expression( 680 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 681 ), 682 "SORTKEY": lambda self: self._parse_sortkey(), 683 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 684 "STABLE": lambda self: self.expression( 685 exp.StabilityProperty, this=exp.Literal.string("STABLE") 686 ), 687 "STORED": lambda self: self._parse_stored(), 688 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 689 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 690 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 691 "TO": lambda self: self._parse_to_table(), 692 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 693 "TTL": lambda self: self._parse_ttl(), 694 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 695 "VOLATILE": lambda self: self._parse_volatile_property(), 696 "WITH": lambda self: self._parse_with_property(), 697 } 698 699 CONSTRAINT_PARSERS = { 700 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 701 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 702 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 703 "CHARACTER SET": lambda self: self.expression( 704 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 705 ), 706 "CHECK": lambda self: self.expression( 707 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 708 ), 709 "COLLATE": lambda self: self.expression( 710 exp.CollateColumnConstraint, this=self._parse_var() 711 ), 712 "COMMENT": lambda self: self.expression( 713 exp.CommentColumnConstraint, this=self._parse_string() 714 ), 715 "COMPRESS": lambda self: self._parse_compress(), 716 "CLUSTERED": lambda self: self.expression( 717 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 718 ), 719 "NONCLUSTERED": lambda self: self.expression( 720 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 721 ), 722 "DEFAULT": lambda self: self.expression( 723 exp.DefaultColumnConstraint, this=self._parse_bitwise() 724 ), 725 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 726 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 727 "FORMAT": lambda self: self.expression( 728 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 729 ), 730 "GENERATED": lambda self: self._parse_generated_as_identity(), 731 "IDENTITY": lambda self: self._parse_auto_increment(), 732 "INLINE": lambda self: self._parse_inline(), 733 "LIKE": lambda self: self._parse_create_like(), 734 "NOT": lambda self: self._parse_not_constraint(), 735 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 736 "ON": lambda self: ( 737 self._match(TokenType.UPDATE) 738 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 739 ) 740 or self.expression(exp.OnProperty, this=self._parse_id_var()), 741 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 742 "PRIMARY KEY": lambda self: self._parse_primary_key(), 743 "REFERENCES": lambda self: self._parse_references(match=False), 744 "TITLE": lambda self: self.expression( 745 exp.TitleColumnConstraint, this=self._parse_var_or_string() 746 ), 747 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 748 "UNIQUE": lambda self: self._parse_unique(), 749 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 750 "WITH": lambda self: self.expression( 751 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 752 ), 753 } 754 755 ALTER_PARSERS = { 756 "ADD": lambda self: self._parse_alter_table_add(), 757 "ALTER": lambda self: self._parse_alter_table_alter(), 758 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 759 "DROP": lambda self: self._parse_alter_table_drop(), 760 "RENAME": lambda self: self._parse_alter_table_rename(), 761 } 762 763 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 764 765 NO_PAREN_FUNCTION_PARSERS = { 766 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 767 "CASE": lambda self: self._parse_case(), 768 "IF": lambda self: self._parse_if(), 769 "NEXT": lambda self: self._parse_next_value_for(), 770 } 771 772 INVALID_FUNC_NAME_TOKENS = { 773 TokenType.IDENTIFIER, 774 TokenType.STRING, 775 } 776 777 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 778 779 FUNCTION_PARSERS = { 780 "ANY_VALUE": lambda self: self._parse_any_value(), 781 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 782 "CONCAT": lambda self: self._parse_concat(), 783 "CONCAT_WS": lambda self: self._parse_concat_ws(), 784 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 785 "DECODE": lambda self: self._parse_decode(), 786 "EXTRACT": lambda self: self._parse_extract(), 787 "JSON_OBJECT": lambda self: self._parse_json_object(), 788 "LOG": lambda self: self._parse_logarithm(), 789 "MATCH": lambda self: self._parse_match_against(), 790 "OPENJSON": lambda self: self._parse_open_json(), 791 "POSITION": lambda self: self._parse_position(), 792 "SAFE_CAST": lambda self: self._parse_cast(False), 793 "STRING_AGG": lambda self: self._parse_string_agg(), 794 "SUBSTRING": lambda self: self._parse_substring(), 795 "TRIM": lambda self: self._parse_trim(), 796 "TRY_CAST": lambda self: self._parse_cast(False), 797 "TRY_CONVERT": lambda self: self._parse_convert(False), 798 } 799 800 QUERY_MODIFIER_PARSERS = { 801 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 802 TokenType.WHERE: lambda self: ("where", self._parse_where()), 803 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 804 TokenType.HAVING: lambda self: ("having", self._parse_having()), 805 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 806 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 807 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 808 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 809 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 810 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 811 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 812 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 813 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 814 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 815 TokenType.CLUSTER_BY: lambda self: ( 816 "cluster", 817 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 818 ), 819 TokenType.DISTRIBUTE_BY: lambda self: ( 820 "distribute", 821 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 822 ), 823 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 824 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 825 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 826 } 827 828 SET_PARSERS = { 829 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 830 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 831 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 832 "TRANSACTION": lambda self: self._parse_set_transaction(), 833 } 834 835 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 836 837 TYPE_LITERAL_PARSERS = { 838 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 839 } 840 841 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 842 843 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 844 845 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 846 847 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 848 TRANSACTION_CHARACTERISTICS = { 849 "ISOLATION LEVEL REPEATABLE READ", 850 "ISOLATION LEVEL READ COMMITTED", 851 "ISOLATION LEVEL READ UNCOMMITTED", 852 "ISOLATION LEVEL SERIALIZABLE", 853 "READ WRITE", 854 "READ ONLY", 855 } 856 857 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 858 859 CLONE_KEYWORDS = {"CLONE", "COPY"} 860 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 861 862 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 863 864 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 865 866 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 867 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 868 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 869 870 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 871 872 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 873 874 DISTINCT_TOKENS = {TokenType.DISTINCT} 875 876 NULL_TOKENS = {TokenType.NULL} 877 878 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 879 880 STRICT_CAST = True 881 882 # A NULL arg in CONCAT yields NULL by default 883 CONCAT_NULL_OUTPUTS_STRING = False 884 885 PREFIXED_PIVOT_COLUMNS = False 886 IDENTIFY_PIVOT_STRINGS = False 887 888 LOG_BASE_FIRST = True 889 LOG_DEFAULTS_TO_LN = False 890 891 # Whether or not ADD is present for each column added by ALTER TABLE 892 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 893 894 # Whether or not the table sample clause expects CSV syntax 895 TABLESAMPLE_CSV = False 896 897 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 898 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 899 900 # Whether the TRIM function expects the characters to trim as its first argument 901 TRIM_PATTERN_FIRST = False 902 903 __slots__ = ( 904 "error_level", 905 "error_message_context", 906 "max_errors", 907 "sql", 908 "errors", 909 "_tokens", 910 "_index", 911 "_curr", 912 "_next", 913 "_prev", 914 "_prev_comments", 915 "_tokenizer", 916 ) 917 918 # Autofilled 919 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 920 INDEX_OFFSET: int = 0 921 UNNEST_COLUMN_ONLY: bool = False 922 ALIAS_POST_TABLESAMPLE: bool = False 923 STRICT_STRING_CONCAT = False 924 SUPPORTS_USER_DEFINED_TYPES = True 925 NORMALIZE_FUNCTIONS = "upper" 926 NULL_ORDERING: str = "nulls_are_small" 927 SHOW_TRIE: t.Dict = {} 928 SET_TRIE: t.Dict = {} 929 FORMAT_MAPPING: t.Dict[str, str] = {} 930 FORMAT_TRIE: t.Dict = {} 931 TIME_MAPPING: t.Dict[str, str] = {} 932 TIME_TRIE: t.Dict = {} 933 934 def __init__( 935 self, 936 error_level: t.Optional[ErrorLevel] = None, 937 error_message_context: int = 100, 938 max_errors: int = 3, 939 ): 940 self.error_level = error_level or ErrorLevel.IMMEDIATE 941 self.error_message_context = error_message_context 942 self.max_errors = max_errors 943 self._tokenizer = self.TOKENIZER_CLASS() 944 self.reset() 945 946 def reset(self): 947 self.sql = "" 948 self.errors = [] 949 self._tokens = [] 950 self._index = 0 951 self._curr = None 952 self._next = None 953 self._prev = None 954 self._prev_comments = None 955 956 def parse( 957 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 958 ) -> t.List[t.Optional[exp.Expression]]: 959 """ 960 Parses a list of tokens and returns a list of syntax trees, one tree 961 per parsed SQL statement. 962 963 Args: 964 raw_tokens: The list of tokens. 965 sql: The original SQL string, used to produce helpful debug messages. 966 967 Returns: 968 The list of the produced syntax trees. 969 """ 970 return self._parse( 971 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 972 ) 973 974 def parse_into( 975 self, 976 expression_types: exp.IntoType, 977 raw_tokens: t.List[Token], 978 sql: t.Optional[str] = None, 979 ) -> t.List[t.Optional[exp.Expression]]: 980 """ 981 Parses a list of tokens into a given Expression type. If a collection of Expression 982 types is given instead, this method will try to parse the token list into each one 983 of them, stopping at the first for which the parsing succeeds. 984 985 Args: 986 expression_types: The expression type(s) to try and parse the token list into. 987 raw_tokens: The list of tokens. 988 sql: The original SQL string, used to produce helpful debug messages. 989 990 Returns: 991 The target Expression. 992 """ 993 errors = [] 994 for expression_type in ensure_list(expression_types): 995 parser = self.EXPRESSION_PARSERS.get(expression_type) 996 if not parser: 997 raise TypeError(f"No parser registered for {expression_type}") 998 999 try: 1000 return self._parse(parser, raw_tokens, sql) 1001 except ParseError as e: 1002 e.errors[0]["into_expression"] = expression_type 1003 errors.append(e) 1004 1005 raise ParseError( 1006 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1007 errors=merge_errors(errors), 1008 ) from errors[-1] 1009 1010 def _parse( 1011 self, 1012 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1013 raw_tokens: t.List[Token], 1014 sql: t.Optional[str] = None, 1015 ) -> t.List[t.Optional[exp.Expression]]: 1016 self.reset() 1017 self.sql = sql or "" 1018 1019 total = len(raw_tokens) 1020 chunks: t.List[t.List[Token]] = [[]] 1021 1022 for i, token in enumerate(raw_tokens): 1023 if token.token_type == TokenType.SEMICOLON: 1024 if i < total - 1: 1025 chunks.append([]) 1026 else: 1027 chunks[-1].append(token) 1028 1029 expressions = [] 1030 1031 for tokens in chunks: 1032 self._index = -1 1033 self._tokens = tokens 1034 self._advance() 1035 1036 expressions.append(parse_method(self)) 1037 1038 if self._index < len(self._tokens): 1039 self.raise_error("Invalid expression / Unexpected token") 1040 1041 self.check_errors() 1042 1043 return expressions 1044 1045 def check_errors(self) -> None: 1046 """Logs or raises any found errors, depending on the chosen error level setting.""" 1047 if self.error_level == ErrorLevel.WARN: 1048 for error in self.errors: 1049 logger.error(str(error)) 1050 elif self.error_level == ErrorLevel.RAISE and self.errors: 1051 raise ParseError( 1052 concat_messages(self.errors, self.max_errors), 1053 errors=merge_errors(self.errors), 1054 ) 1055 1056 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1057 """ 1058 Appends an error in the list of recorded errors or raises it, depending on the chosen 1059 error level setting. 1060 """ 1061 token = token or self._curr or self._prev or Token.string("") 1062 start = token.start 1063 end = token.end + 1 1064 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1065 highlight = self.sql[start:end] 1066 end_context = self.sql[end : end + self.error_message_context] 1067 1068 error = ParseError.new( 1069 f"{message}. Line {token.line}, Col: {token.col}.\n" 1070 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1071 description=message, 1072 line=token.line, 1073 col=token.col, 1074 start_context=start_context, 1075 highlight=highlight, 1076 end_context=end_context, 1077 ) 1078 1079 if self.error_level == ErrorLevel.IMMEDIATE: 1080 raise error 1081 1082 self.errors.append(error) 1083 1084 def expression( 1085 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1086 ) -> E: 1087 """ 1088 Creates a new, validated Expression. 1089 1090 Args: 1091 exp_class: The expression class to instantiate. 1092 comments: An optional list of comments to attach to the expression. 1093 kwargs: The arguments to set for the expression along with their respective values. 1094 1095 Returns: 1096 The target expression. 1097 """ 1098 instance = exp_class(**kwargs) 1099 instance.add_comments(comments) if comments else self._add_comments(instance) 1100 return self.validate_expression(instance) 1101 1102 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1103 if expression and self._prev_comments: 1104 expression.add_comments(self._prev_comments) 1105 self._prev_comments = None 1106 1107 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1108 """ 1109 Validates an Expression, making sure that all its mandatory arguments are set. 1110 1111 Args: 1112 expression: The expression to validate. 1113 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1114 1115 Returns: 1116 The validated expression. 1117 """ 1118 if self.error_level != ErrorLevel.IGNORE: 1119 for error_message in expression.error_messages(args): 1120 self.raise_error(error_message) 1121 1122 return expression 1123 1124 def _find_sql(self, start: Token, end: Token) -> str: 1125 return self.sql[start.start : end.end + 1] 1126 1127 def _advance(self, times: int = 1) -> None: 1128 self._index += times 1129 self._curr = seq_get(self._tokens, self._index) 1130 self._next = seq_get(self._tokens, self._index + 1) 1131 1132 if self._index > 0: 1133 self._prev = self._tokens[self._index - 1] 1134 self._prev_comments = self._prev.comments 1135 else: 1136 self._prev = None 1137 self._prev_comments = None 1138 1139 def _retreat(self, index: int) -> None: 1140 if index != self._index: 1141 self._advance(index - self._index) 1142 1143 def _parse_command(self) -> exp.Command: 1144 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1145 1146 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1147 start = self._prev 1148 exists = self._parse_exists() if allow_exists else None 1149 1150 self._match(TokenType.ON) 1151 1152 kind = self._match_set(self.CREATABLES) and self._prev 1153 if not kind: 1154 return self._parse_as_command(start) 1155 1156 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1157 this = self._parse_user_defined_function(kind=kind.token_type) 1158 elif kind.token_type == TokenType.TABLE: 1159 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1160 elif kind.token_type == TokenType.COLUMN: 1161 this = self._parse_column() 1162 else: 1163 this = self._parse_id_var() 1164 1165 self._match(TokenType.IS) 1166 1167 return self.expression( 1168 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1169 ) 1170 1171 def _parse_to_table( 1172 self, 1173 ) -> exp.ToTableProperty: 1174 table = self._parse_table_parts(schema=True) 1175 return self.expression(exp.ToTableProperty, this=table) 1176 1177 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1178 def _parse_ttl(self) -> exp.Expression: 1179 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1180 this = self._parse_bitwise() 1181 1182 if self._match_text_seq("DELETE"): 1183 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1184 if self._match_text_seq("RECOMPRESS"): 1185 return self.expression( 1186 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1187 ) 1188 if self._match_text_seq("TO", "DISK"): 1189 return self.expression( 1190 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1191 ) 1192 if self._match_text_seq("TO", "VOLUME"): 1193 return self.expression( 1194 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1195 ) 1196 1197 return this 1198 1199 expressions = self._parse_csv(_parse_ttl_action) 1200 where = self._parse_where() 1201 group = self._parse_group() 1202 1203 aggregates = None 1204 if group and self._match(TokenType.SET): 1205 aggregates = self._parse_csv(self._parse_set_item) 1206 1207 return self.expression( 1208 exp.MergeTreeTTL, 1209 expressions=expressions, 1210 where=where, 1211 group=group, 1212 aggregates=aggregates, 1213 ) 1214 1215 def _parse_statement(self) -> t.Optional[exp.Expression]: 1216 if self._curr is None: 1217 return None 1218 1219 if self._match_set(self.STATEMENT_PARSERS): 1220 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1221 1222 if self._match_set(Tokenizer.COMMANDS): 1223 return self._parse_command() 1224 1225 expression = self._parse_expression() 1226 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1227 return self._parse_query_modifiers(expression) 1228 1229 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1230 start = self._prev 1231 temporary = self._match(TokenType.TEMPORARY) 1232 materialized = self._match_text_seq("MATERIALIZED") 1233 1234 kind = self._match_set(self.CREATABLES) and self._prev.text 1235 if not kind: 1236 return self._parse_as_command(start) 1237 1238 return self.expression( 1239 exp.Drop, 1240 comments=start.comments, 1241 exists=exists or self._parse_exists(), 1242 this=self._parse_table(schema=True), 1243 kind=kind, 1244 temporary=temporary, 1245 materialized=materialized, 1246 cascade=self._match_text_seq("CASCADE"), 1247 constraints=self._match_text_seq("CONSTRAINTS"), 1248 purge=self._match_text_seq("PURGE"), 1249 ) 1250 1251 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1252 return ( 1253 self._match_text_seq("IF") 1254 and (not not_ or self._match(TokenType.NOT)) 1255 and self._match(TokenType.EXISTS) 1256 ) 1257 1258 def _parse_create(self) -> exp.Create | exp.Command: 1259 # Note: this can't be None because we've matched a statement parser 1260 start = self._prev 1261 comments = self._prev_comments 1262 1263 replace = start.text.upper() == "REPLACE" or self._match_pair( 1264 TokenType.OR, TokenType.REPLACE 1265 ) 1266 unique = self._match(TokenType.UNIQUE) 1267 1268 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1269 self._advance() 1270 1271 properties = None 1272 create_token = self._match_set(self.CREATABLES) and self._prev 1273 1274 if not create_token: 1275 # exp.Properties.Location.POST_CREATE 1276 properties = self._parse_properties() 1277 create_token = self._match_set(self.CREATABLES) and self._prev 1278 1279 if not properties or not create_token: 1280 return self._parse_as_command(start) 1281 1282 exists = self._parse_exists(not_=True) 1283 this = None 1284 expression: t.Optional[exp.Expression] = None 1285 indexes = None 1286 no_schema_binding = None 1287 begin = None 1288 end = None 1289 clone = None 1290 1291 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1292 nonlocal properties 1293 if properties and temp_props: 1294 properties.expressions.extend(temp_props.expressions) 1295 elif temp_props: 1296 properties = temp_props 1297 1298 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1299 this = self._parse_user_defined_function(kind=create_token.token_type) 1300 1301 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1302 extend_props(self._parse_properties()) 1303 1304 self._match(TokenType.ALIAS) 1305 1306 if self._match(TokenType.COMMAND): 1307 expression = self._parse_as_command(self._prev) 1308 else: 1309 begin = self._match(TokenType.BEGIN) 1310 return_ = self._match_text_seq("RETURN") 1311 1312 if self._match(TokenType.STRING, advance=False): 1313 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1314 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1315 expression = self._parse_string() 1316 extend_props(self._parse_properties()) 1317 else: 1318 expression = self._parse_statement() 1319 1320 end = self._match_text_seq("END") 1321 1322 if return_: 1323 expression = self.expression(exp.Return, this=expression) 1324 elif create_token.token_type == TokenType.INDEX: 1325 this = self._parse_index(index=self._parse_id_var()) 1326 elif create_token.token_type in self.DB_CREATABLES: 1327 table_parts = self._parse_table_parts(schema=True) 1328 1329 # exp.Properties.Location.POST_NAME 1330 self._match(TokenType.COMMA) 1331 extend_props(self._parse_properties(before=True)) 1332 1333 this = self._parse_schema(this=table_parts) 1334 1335 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1336 extend_props(self._parse_properties()) 1337 1338 self._match(TokenType.ALIAS) 1339 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1340 # exp.Properties.Location.POST_ALIAS 1341 extend_props(self._parse_properties()) 1342 1343 expression = self._parse_ddl_select() 1344 1345 if create_token.token_type == TokenType.TABLE: 1346 # exp.Properties.Location.POST_EXPRESSION 1347 extend_props(self._parse_properties()) 1348 1349 indexes = [] 1350 while True: 1351 index = self._parse_index() 1352 1353 # exp.Properties.Location.POST_INDEX 1354 extend_props(self._parse_properties()) 1355 1356 if not index: 1357 break 1358 else: 1359 self._match(TokenType.COMMA) 1360 indexes.append(index) 1361 elif create_token.token_type == TokenType.VIEW: 1362 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1363 no_schema_binding = True 1364 1365 shallow = self._match_text_seq("SHALLOW") 1366 1367 if self._match_texts(self.CLONE_KEYWORDS): 1368 copy = self._prev.text.lower() == "copy" 1369 clone = self._parse_table(schema=True) 1370 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1371 clone_kind = ( 1372 self._match(TokenType.L_PAREN) 1373 and self._match_texts(self.CLONE_KINDS) 1374 and self._prev.text.upper() 1375 ) 1376 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1377 self._match(TokenType.R_PAREN) 1378 clone = self.expression( 1379 exp.Clone, 1380 this=clone, 1381 when=when, 1382 kind=clone_kind, 1383 shallow=shallow, 1384 expression=clone_expression, 1385 copy=copy, 1386 ) 1387 1388 return self.expression( 1389 exp.Create, 1390 comments=comments, 1391 this=this, 1392 kind=create_token.text, 1393 replace=replace, 1394 unique=unique, 1395 expression=expression, 1396 exists=exists, 1397 properties=properties, 1398 indexes=indexes, 1399 no_schema_binding=no_schema_binding, 1400 begin=begin, 1401 end=end, 1402 clone=clone, 1403 ) 1404 1405 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1406 # only used for teradata currently 1407 self._match(TokenType.COMMA) 1408 1409 kwargs = { 1410 "no": self._match_text_seq("NO"), 1411 "dual": self._match_text_seq("DUAL"), 1412 "before": self._match_text_seq("BEFORE"), 1413 "default": self._match_text_seq("DEFAULT"), 1414 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1415 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1416 "after": self._match_text_seq("AFTER"), 1417 "minimum": self._match_texts(("MIN", "MINIMUM")), 1418 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1419 } 1420 1421 if self._match_texts(self.PROPERTY_PARSERS): 1422 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1423 try: 1424 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1425 except TypeError: 1426 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1427 1428 return None 1429 1430 def _parse_property(self) -> t.Optional[exp.Expression]: 1431 if self._match_texts(self.PROPERTY_PARSERS): 1432 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1433 1434 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1435 return self._parse_character_set(default=True) 1436 1437 if self._match_text_seq("COMPOUND", "SORTKEY"): 1438 return self._parse_sortkey(compound=True) 1439 1440 if self._match_text_seq("SQL", "SECURITY"): 1441 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1442 1443 index = self._index 1444 key = self._parse_column() 1445 1446 if not self._match(TokenType.EQ): 1447 self._retreat(index) 1448 return None 1449 1450 return self.expression( 1451 exp.Property, 1452 this=key.to_dot() if isinstance(key, exp.Column) else key, 1453 value=self._parse_column() or self._parse_var(any_token=True), 1454 ) 1455 1456 def _parse_stored(self) -> exp.FileFormatProperty: 1457 self._match(TokenType.ALIAS) 1458 1459 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1460 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1461 1462 return self.expression( 1463 exp.FileFormatProperty, 1464 this=self.expression( 1465 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1466 ) 1467 if input_format or output_format 1468 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1469 ) 1470 1471 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1472 self._match(TokenType.EQ) 1473 self._match(TokenType.ALIAS) 1474 return self.expression(exp_class, this=self._parse_field()) 1475 1476 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1477 properties = [] 1478 while True: 1479 if before: 1480 prop = self._parse_property_before() 1481 else: 1482 prop = self._parse_property() 1483 1484 if not prop: 1485 break 1486 for p in ensure_list(prop): 1487 properties.append(p) 1488 1489 if properties: 1490 return self.expression(exp.Properties, expressions=properties) 1491 1492 return None 1493 1494 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1495 return self.expression( 1496 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1497 ) 1498 1499 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1500 if self._index >= 2: 1501 pre_volatile_token = self._tokens[self._index - 2] 1502 else: 1503 pre_volatile_token = None 1504 1505 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1506 return exp.VolatileProperty() 1507 1508 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1509 1510 def _parse_with_property( 1511 self, 1512 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1513 if self._match(TokenType.L_PAREN, advance=False): 1514 return self._parse_wrapped_csv(self._parse_property) 1515 1516 if self._match_text_seq("JOURNAL"): 1517 return self._parse_withjournaltable() 1518 1519 if self._match_text_seq("DATA"): 1520 return self._parse_withdata(no=False) 1521 elif self._match_text_seq("NO", "DATA"): 1522 return self._parse_withdata(no=True) 1523 1524 if not self._next: 1525 return None 1526 1527 return self._parse_withisolatedloading() 1528 1529 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1530 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1531 self._match(TokenType.EQ) 1532 1533 user = self._parse_id_var() 1534 self._match(TokenType.PARAMETER) 1535 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1536 1537 if not user or not host: 1538 return None 1539 1540 return exp.DefinerProperty(this=f"{user}@{host}") 1541 1542 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1543 self._match(TokenType.TABLE) 1544 self._match(TokenType.EQ) 1545 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1546 1547 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1548 return self.expression(exp.LogProperty, no=no) 1549 1550 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1551 return self.expression(exp.JournalProperty, **kwargs) 1552 1553 def _parse_checksum(self) -> exp.ChecksumProperty: 1554 self._match(TokenType.EQ) 1555 1556 on = None 1557 if self._match(TokenType.ON): 1558 on = True 1559 elif self._match_text_seq("OFF"): 1560 on = False 1561 1562 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1563 1564 def _parse_cluster(self) -> exp.Cluster: 1565 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1566 1567 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1568 self._match_text_seq("BY") 1569 1570 self._match_l_paren() 1571 expressions = self._parse_csv(self._parse_column) 1572 self._match_r_paren() 1573 1574 if self._match_text_seq("SORTED", "BY"): 1575 self._match_l_paren() 1576 sorted_by = self._parse_csv(self._parse_ordered) 1577 self._match_r_paren() 1578 else: 1579 sorted_by = None 1580 1581 self._match(TokenType.INTO) 1582 buckets = self._parse_number() 1583 self._match_text_seq("BUCKETS") 1584 1585 return self.expression( 1586 exp.ClusteredByProperty, 1587 expressions=expressions, 1588 sorted_by=sorted_by, 1589 buckets=buckets, 1590 ) 1591 1592 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1593 if not self._match_text_seq("GRANTS"): 1594 self._retreat(self._index - 1) 1595 return None 1596 1597 return self.expression(exp.CopyGrantsProperty) 1598 1599 def _parse_freespace(self) -> exp.FreespaceProperty: 1600 self._match(TokenType.EQ) 1601 return self.expression( 1602 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1603 ) 1604 1605 def _parse_mergeblockratio( 1606 self, no: bool = False, default: bool = False 1607 ) -> exp.MergeBlockRatioProperty: 1608 if self._match(TokenType.EQ): 1609 return self.expression( 1610 exp.MergeBlockRatioProperty, 1611 this=self._parse_number(), 1612 percent=self._match(TokenType.PERCENT), 1613 ) 1614 1615 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1616 1617 def _parse_datablocksize( 1618 self, 1619 default: t.Optional[bool] = None, 1620 minimum: t.Optional[bool] = None, 1621 maximum: t.Optional[bool] = None, 1622 ) -> exp.DataBlocksizeProperty: 1623 self._match(TokenType.EQ) 1624 size = self._parse_number() 1625 1626 units = None 1627 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1628 units = self._prev.text 1629 1630 return self.expression( 1631 exp.DataBlocksizeProperty, 1632 size=size, 1633 units=units, 1634 default=default, 1635 minimum=minimum, 1636 maximum=maximum, 1637 ) 1638 1639 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1640 self._match(TokenType.EQ) 1641 always = self._match_text_seq("ALWAYS") 1642 manual = self._match_text_seq("MANUAL") 1643 never = self._match_text_seq("NEVER") 1644 default = self._match_text_seq("DEFAULT") 1645 1646 autotemp = None 1647 if self._match_text_seq("AUTOTEMP"): 1648 autotemp = self._parse_schema() 1649 1650 return self.expression( 1651 exp.BlockCompressionProperty, 1652 always=always, 1653 manual=manual, 1654 never=never, 1655 default=default, 1656 autotemp=autotemp, 1657 ) 1658 1659 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1660 no = self._match_text_seq("NO") 1661 concurrent = self._match_text_seq("CONCURRENT") 1662 self._match_text_seq("ISOLATED", "LOADING") 1663 for_all = self._match_text_seq("FOR", "ALL") 1664 for_insert = self._match_text_seq("FOR", "INSERT") 1665 for_none = self._match_text_seq("FOR", "NONE") 1666 return self.expression( 1667 exp.IsolatedLoadingProperty, 1668 no=no, 1669 concurrent=concurrent, 1670 for_all=for_all, 1671 for_insert=for_insert, 1672 for_none=for_none, 1673 ) 1674 1675 def _parse_locking(self) -> exp.LockingProperty: 1676 if self._match(TokenType.TABLE): 1677 kind = "TABLE" 1678 elif self._match(TokenType.VIEW): 1679 kind = "VIEW" 1680 elif self._match(TokenType.ROW): 1681 kind = "ROW" 1682 elif self._match_text_seq("DATABASE"): 1683 kind = "DATABASE" 1684 else: 1685 kind = None 1686 1687 if kind in ("DATABASE", "TABLE", "VIEW"): 1688 this = self._parse_table_parts() 1689 else: 1690 this = None 1691 1692 if self._match(TokenType.FOR): 1693 for_or_in = "FOR" 1694 elif self._match(TokenType.IN): 1695 for_or_in = "IN" 1696 else: 1697 for_or_in = None 1698 1699 if self._match_text_seq("ACCESS"): 1700 lock_type = "ACCESS" 1701 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1702 lock_type = "EXCLUSIVE" 1703 elif self._match_text_seq("SHARE"): 1704 lock_type = "SHARE" 1705 elif self._match_text_seq("READ"): 1706 lock_type = "READ" 1707 elif self._match_text_seq("WRITE"): 1708 lock_type = "WRITE" 1709 elif self._match_text_seq("CHECKSUM"): 1710 lock_type = "CHECKSUM" 1711 else: 1712 lock_type = None 1713 1714 override = self._match_text_seq("OVERRIDE") 1715 1716 return self.expression( 1717 exp.LockingProperty, 1718 this=this, 1719 kind=kind, 1720 for_or_in=for_or_in, 1721 lock_type=lock_type, 1722 override=override, 1723 ) 1724 1725 def _parse_partition_by(self) -> t.List[exp.Expression]: 1726 if self._match(TokenType.PARTITION_BY): 1727 return self._parse_csv(self._parse_conjunction) 1728 return [] 1729 1730 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1731 self._match(TokenType.EQ) 1732 return self.expression( 1733 exp.PartitionedByProperty, 1734 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1735 ) 1736 1737 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1738 if self._match_text_seq("AND", "STATISTICS"): 1739 statistics = True 1740 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1741 statistics = False 1742 else: 1743 statistics = None 1744 1745 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1746 1747 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1748 if self._match_text_seq("PRIMARY", "INDEX"): 1749 return exp.NoPrimaryIndexProperty() 1750 return None 1751 1752 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1753 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1754 return exp.OnCommitProperty() 1755 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1756 return exp.OnCommitProperty(delete=True) 1757 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1758 1759 def _parse_distkey(self) -> exp.DistKeyProperty: 1760 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1761 1762 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1763 table = self._parse_table(schema=True) 1764 1765 options = [] 1766 while self._match_texts(("INCLUDING", "EXCLUDING")): 1767 this = self._prev.text.upper() 1768 1769 id_var = self._parse_id_var() 1770 if not id_var: 1771 return None 1772 1773 options.append( 1774 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1775 ) 1776 1777 return self.expression(exp.LikeProperty, this=table, expressions=options) 1778 1779 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1780 return self.expression( 1781 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1782 ) 1783 1784 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1785 self._match(TokenType.EQ) 1786 return self.expression( 1787 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1788 ) 1789 1790 def _parse_returns(self) -> exp.ReturnsProperty: 1791 value: t.Optional[exp.Expression] 1792 is_table = self._match(TokenType.TABLE) 1793 1794 if is_table: 1795 if self._match(TokenType.LT): 1796 value = self.expression( 1797 exp.Schema, 1798 this="TABLE", 1799 expressions=self._parse_csv(self._parse_struct_types), 1800 ) 1801 if not self._match(TokenType.GT): 1802 self.raise_error("Expecting >") 1803 else: 1804 value = self._parse_schema(exp.var("TABLE")) 1805 else: 1806 value = self._parse_types() 1807 1808 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1809 1810 def _parse_describe(self) -> exp.Describe: 1811 kind = self._match_set(self.CREATABLES) and self._prev.text 1812 this = self._parse_table(schema=True) 1813 properties = self._parse_properties() 1814 expressions = properties.expressions if properties else None 1815 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1816 1817 def _parse_insert(self) -> exp.Insert: 1818 comments = ensure_list(self._prev_comments) 1819 overwrite = self._match(TokenType.OVERWRITE) 1820 ignore = self._match(TokenType.IGNORE) 1821 local = self._match_text_seq("LOCAL") 1822 alternative = None 1823 1824 if self._match_text_seq("DIRECTORY"): 1825 this: t.Optional[exp.Expression] = self.expression( 1826 exp.Directory, 1827 this=self._parse_var_or_string(), 1828 local=local, 1829 row_format=self._parse_row_format(match_row=True), 1830 ) 1831 else: 1832 if self._match(TokenType.OR): 1833 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1834 1835 self._match(TokenType.INTO) 1836 comments += ensure_list(self._prev_comments) 1837 self._match(TokenType.TABLE) 1838 this = self._parse_table(schema=True) 1839 1840 returning = self._parse_returning() 1841 1842 return self.expression( 1843 exp.Insert, 1844 comments=comments, 1845 this=this, 1846 by_name=self._match_text_seq("BY", "NAME"), 1847 exists=self._parse_exists(), 1848 partition=self._parse_partition(), 1849 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1850 and self._parse_conjunction(), 1851 expression=self._parse_ddl_select(), 1852 conflict=self._parse_on_conflict(), 1853 returning=returning or self._parse_returning(), 1854 overwrite=overwrite, 1855 alternative=alternative, 1856 ignore=ignore, 1857 ) 1858 1859 def _parse_kill(self) -> exp.Kill: 1860 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1861 1862 return self.expression( 1863 exp.Kill, 1864 this=self._parse_primary(), 1865 kind=kind, 1866 ) 1867 1868 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1869 conflict = self._match_text_seq("ON", "CONFLICT") 1870 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1871 1872 if not conflict and not duplicate: 1873 return None 1874 1875 nothing = None 1876 expressions = None 1877 key = None 1878 constraint = None 1879 1880 if conflict: 1881 if self._match_text_seq("ON", "CONSTRAINT"): 1882 constraint = self._parse_id_var() 1883 else: 1884 key = self._parse_csv(self._parse_value) 1885 1886 self._match_text_seq("DO") 1887 if self._match_text_seq("NOTHING"): 1888 nothing = True 1889 else: 1890 self._match(TokenType.UPDATE) 1891 self._match(TokenType.SET) 1892 expressions = self._parse_csv(self._parse_equality) 1893 1894 return self.expression( 1895 exp.OnConflict, 1896 duplicate=duplicate, 1897 expressions=expressions, 1898 nothing=nothing, 1899 key=key, 1900 constraint=constraint, 1901 ) 1902 1903 def _parse_returning(self) -> t.Optional[exp.Returning]: 1904 if not self._match(TokenType.RETURNING): 1905 return None 1906 return self.expression( 1907 exp.Returning, 1908 expressions=self._parse_csv(self._parse_expression), 1909 into=self._match(TokenType.INTO) and self._parse_table_part(), 1910 ) 1911 1912 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1913 if not self._match(TokenType.FORMAT): 1914 return None 1915 return self._parse_row_format() 1916 1917 def _parse_row_format( 1918 self, match_row: bool = False 1919 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1920 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1921 return None 1922 1923 if self._match_text_seq("SERDE"): 1924 this = self._parse_string() 1925 1926 serde_properties = None 1927 if self._match(TokenType.SERDE_PROPERTIES): 1928 serde_properties = self.expression( 1929 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1930 ) 1931 1932 return self.expression( 1933 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1934 ) 1935 1936 self._match_text_seq("DELIMITED") 1937 1938 kwargs = {} 1939 1940 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1941 kwargs["fields"] = self._parse_string() 1942 if self._match_text_seq("ESCAPED", "BY"): 1943 kwargs["escaped"] = self._parse_string() 1944 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1945 kwargs["collection_items"] = self._parse_string() 1946 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1947 kwargs["map_keys"] = self._parse_string() 1948 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1949 kwargs["lines"] = self._parse_string() 1950 if self._match_text_seq("NULL", "DEFINED", "AS"): 1951 kwargs["null"] = self._parse_string() 1952 1953 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1954 1955 def _parse_load(self) -> exp.LoadData | exp.Command: 1956 if self._match_text_seq("DATA"): 1957 local = self._match_text_seq("LOCAL") 1958 self._match_text_seq("INPATH") 1959 inpath = self._parse_string() 1960 overwrite = self._match(TokenType.OVERWRITE) 1961 self._match_pair(TokenType.INTO, TokenType.TABLE) 1962 1963 return self.expression( 1964 exp.LoadData, 1965 this=self._parse_table(schema=True), 1966 local=local, 1967 overwrite=overwrite, 1968 inpath=inpath, 1969 partition=self._parse_partition(), 1970 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1971 serde=self._match_text_seq("SERDE") and self._parse_string(), 1972 ) 1973 return self._parse_as_command(self._prev) 1974 1975 def _parse_delete(self) -> exp.Delete: 1976 # This handles MySQL's "Multiple-Table Syntax" 1977 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1978 tables = None 1979 comments = self._prev_comments 1980 if not self._match(TokenType.FROM, advance=False): 1981 tables = self._parse_csv(self._parse_table) or None 1982 1983 returning = self._parse_returning() 1984 1985 return self.expression( 1986 exp.Delete, 1987 comments=comments, 1988 tables=tables, 1989 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1990 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1991 where=self._parse_where(), 1992 returning=returning or self._parse_returning(), 1993 limit=self._parse_limit(), 1994 ) 1995 1996 def _parse_update(self) -> exp.Update: 1997 comments = self._prev_comments 1998 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 1999 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2000 returning = self._parse_returning() 2001 return self.expression( 2002 exp.Update, 2003 comments=comments, 2004 **{ # type: ignore 2005 "this": this, 2006 "expressions": expressions, 2007 "from": self._parse_from(joins=True), 2008 "where": self._parse_where(), 2009 "returning": returning or self._parse_returning(), 2010 "order": self._parse_order(), 2011 "limit": self._parse_limit(), 2012 }, 2013 ) 2014 2015 def _parse_uncache(self) -> exp.Uncache: 2016 if not self._match(TokenType.TABLE): 2017 self.raise_error("Expecting TABLE after UNCACHE") 2018 2019 return self.expression( 2020 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2021 ) 2022 2023 def _parse_cache(self) -> exp.Cache: 2024 lazy = self._match_text_seq("LAZY") 2025 self._match(TokenType.TABLE) 2026 table = self._parse_table(schema=True) 2027 2028 options = [] 2029 if self._match_text_seq("OPTIONS"): 2030 self._match_l_paren() 2031 k = self._parse_string() 2032 self._match(TokenType.EQ) 2033 v = self._parse_string() 2034 options = [k, v] 2035 self._match_r_paren() 2036 2037 self._match(TokenType.ALIAS) 2038 return self.expression( 2039 exp.Cache, 2040 this=table, 2041 lazy=lazy, 2042 options=options, 2043 expression=self._parse_select(nested=True), 2044 ) 2045 2046 def _parse_partition(self) -> t.Optional[exp.Partition]: 2047 if not self._match(TokenType.PARTITION): 2048 return None 2049 2050 return self.expression( 2051 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2052 ) 2053 2054 def _parse_value(self) -> exp.Tuple: 2055 if self._match(TokenType.L_PAREN): 2056 expressions = self._parse_csv(self._parse_conjunction) 2057 self._match_r_paren() 2058 return self.expression(exp.Tuple, expressions=expressions) 2059 2060 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2061 # https://prestodb.io/docs/current/sql/values.html 2062 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2063 2064 def _parse_projections(self) -> t.List[exp.Expression]: 2065 return self._parse_expressions() 2066 2067 def _parse_select( 2068 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2069 ) -> t.Optional[exp.Expression]: 2070 cte = self._parse_with() 2071 2072 if cte: 2073 this = self._parse_statement() 2074 2075 if not this: 2076 self.raise_error("Failed to parse any statement following CTE") 2077 return cte 2078 2079 if "with" in this.arg_types: 2080 this.set("with", cte) 2081 else: 2082 self.raise_error(f"{this.key} does not support CTE") 2083 this = cte 2084 2085 return this 2086 2087 # duckdb supports leading with FROM x 2088 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2089 2090 if self._match(TokenType.SELECT): 2091 comments = self._prev_comments 2092 2093 hint = self._parse_hint() 2094 all_ = self._match(TokenType.ALL) 2095 distinct = self._match_set(self.DISTINCT_TOKENS) 2096 2097 kind = ( 2098 self._match(TokenType.ALIAS) 2099 and self._match_texts(("STRUCT", "VALUE")) 2100 and self._prev.text 2101 ) 2102 2103 if distinct: 2104 distinct = self.expression( 2105 exp.Distinct, 2106 on=self._parse_value() if self._match(TokenType.ON) else None, 2107 ) 2108 2109 if all_ and distinct: 2110 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2111 2112 limit = self._parse_limit(top=True) 2113 projections = self._parse_projections() 2114 2115 this = self.expression( 2116 exp.Select, 2117 kind=kind, 2118 hint=hint, 2119 distinct=distinct, 2120 expressions=projections, 2121 limit=limit, 2122 ) 2123 this.comments = comments 2124 2125 into = self._parse_into() 2126 if into: 2127 this.set("into", into) 2128 2129 if not from_: 2130 from_ = self._parse_from() 2131 2132 if from_: 2133 this.set("from", from_) 2134 2135 this = self._parse_query_modifiers(this) 2136 elif (table or nested) and self._match(TokenType.L_PAREN): 2137 if self._match(TokenType.PIVOT): 2138 this = self._parse_simplified_pivot() 2139 elif self._match(TokenType.FROM): 2140 this = exp.select("*").from_( 2141 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2142 ) 2143 else: 2144 this = self._parse_table() if table else self._parse_select(nested=True) 2145 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2146 2147 self._match_r_paren() 2148 2149 # We return early here so that the UNION isn't attached to the subquery by the 2150 # following call to _parse_set_operations, but instead becomes the parent node 2151 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2152 elif self._match(TokenType.VALUES): 2153 this = self.expression( 2154 exp.Values, 2155 expressions=self._parse_csv(self._parse_value), 2156 alias=self._parse_table_alias(), 2157 ) 2158 elif from_: 2159 this = exp.select("*").from_(from_.this, copy=False) 2160 else: 2161 this = None 2162 2163 return self._parse_set_operations(this) 2164 2165 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2166 if not skip_with_token and not self._match(TokenType.WITH): 2167 return None 2168 2169 comments = self._prev_comments 2170 recursive = self._match(TokenType.RECURSIVE) 2171 2172 expressions = [] 2173 while True: 2174 expressions.append(self._parse_cte()) 2175 2176 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2177 break 2178 else: 2179 self._match(TokenType.WITH) 2180 2181 return self.expression( 2182 exp.With, comments=comments, expressions=expressions, recursive=recursive 2183 ) 2184 2185 def _parse_cte(self) -> exp.CTE: 2186 alias = self._parse_table_alias() 2187 if not alias or not alias.this: 2188 self.raise_error("Expected CTE to have alias") 2189 2190 self._match(TokenType.ALIAS) 2191 return self.expression( 2192 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2193 ) 2194 2195 def _parse_table_alias( 2196 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2197 ) -> t.Optional[exp.TableAlias]: 2198 any_token = self._match(TokenType.ALIAS) 2199 alias = ( 2200 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2201 or self._parse_string_as_identifier() 2202 ) 2203 2204 index = self._index 2205 if self._match(TokenType.L_PAREN): 2206 columns = self._parse_csv(self._parse_function_parameter) 2207 self._match_r_paren() if columns else self._retreat(index) 2208 else: 2209 columns = None 2210 2211 if not alias and not columns: 2212 return None 2213 2214 return self.expression(exp.TableAlias, this=alias, columns=columns) 2215 2216 def _parse_subquery( 2217 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2218 ) -> t.Optional[exp.Subquery]: 2219 if not this: 2220 return None 2221 2222 return self.expression( 2223 exp.Subquery, 2224 this=this, 2225 pivots=self._parse_pivots(), 2226 alias=self._parse_table_alias() if parse_alias else None, 2227 ) 2228 2229 def _parse_query_modifiers( 2230 self, this: t.Optional[exp.Expression] 2231 ) -> t.Optional[exp.Expression]: 2232 if isinstance(this, self.MODIFIABLES): 2233 for join in iter(self._parse_join, None): 2234 this.append("joins", join) 2235 for lateral in iter(self._parse_lateral, None): 2236 this.append("laterals", lateral) 2237 2238 while True: 2239 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2240 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2241 key, expression = parser(self) 2242 2243 if expression: 2244 this.set(key, expression) 2245 if key == "limit": 2246 offset = expression.args.pop("offset", None) 2247 if offset: 2248 this.set("offset", exp.Offset(expression=offset)) 2249 continue 2250 break 2251 return this 2252 2253 def _parse_hint(self) -> t.Optional[exp.Hint]: 2254 if self._match(TokenType.HINT): 2255 hints = [] 2256 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2257 hints.extend(hint) 2258 2259 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2260 self.raise_error("Expected */ after HINT") 2261 2262 return self.expression(exp.Hint, expressions=hints) 2263 2264 return None 2265 2266 def _parse_into(self) -> t.Optional[exp.Into]: 2267 if not self._match(TokenType.INTO): 2268 return None 2269 2270 temp = self._match(TokenType.TEMPORARY) 2271 unlogged = self._match_text_seq("UNLOGGED") 2272 self._match(TokenType.TABLE) 2273 2274 return self.expression( 2275 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2276 ) 2277 2278 def _parse_from( 2279 self, joins: bool = False, skip_from_token: bool = False 2280 ) -> t.Optional[exp.From]: 2281 if not skip_from_token and not self._match(TokenType.FROM): 2282 return None 2283 2284 return self.expression( 2285 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2286 ) 2287 2288 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2289 if not self._match(TokenType.MATCH_RECOGNIZE): 2290 return None 2291 2292 self._match_l_paren() 2293 2294 partition = self._parse_partition_by() 2295 order = self._parse_order() 2296 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2297 2298 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2299 rows = exp.var("ONE ROW PER MATCH") 2300 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2301 text = "ALL ROWS PER MATCH" 2302 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2303 text += f" SHOW EMPTY MATCHES" 2304 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2305 text += f" OMIT EMPTY MATCHES" 2306 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2307 text += f" WITH UNMATCHED ROWS" 2308 rows = exp.var(text) 2309 else: 2310 rows = None 2311 2312 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2313 text = "AFTER MATCH SKIP" 2314 if self._match_text_seq("PAST", "LAST", "ROW"): 2315 text += f" PAST LAST ROW" 2316 elif self._match_text_seq("TO", "NEXT", "ROW"): 2317 text += f" TO NEXT ROW" 2318 elif self._match_text_seq("TO", "FIRST"): 2319 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2320 elif self._match_text_seq("TO", "LAST"): 2321 text += f" TO LAST {self._advance_any().text}" # type: ignore 2322 after = exp.var(text) 2323 else: 2324 after = None 2325 2326 if self._match_text_seq("PATTERN"): 2327 self._match_l_paren() 2328 2329 if not self._curr: 2330 self.raise_error("Expecting )", self._curr) 2331 2332 paren = 1 2333 start = self._curr 2334 2335 while self._curr and paren > 0: 2336 if self._curr.token_type == TokenType.L_PAREN: 2337 paren += 1 2338 if self._curr.token_type == TokenType.R_PAREN: 2339 paren -= 1 2340 2341 end = self._prev 2342 self._advance() 2343 2344 if paren > 0: 2345 self.raise_error("Expecting )", self._curr) 2346 2347 pattern = exp.var(self._find_sql(start, end)) 2348 else: 2349 pattern = None 2350 2351 define = ( 2352 self._parse_csv( 2353 lambda: self.expression( 2354 exp.Alias, 2355 alias=self._parse_id_var(any_token=True), 2356 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2357 ) 2358 ) 2359 if self._match_text_seq("DEFINE") 2360 else None 2361 ) 2362 2363 self._match_r_paren() 2364 2365 return self.expression( 2366 exp.MatchRecognize, 2367 partition_by=partition, 2368 order=order, 2369 measures=measures, 2370 rows=rows, 2371 after=after, 2372 pattern=pattern, 2373 define=define, 2374 alias=self._parse_table_alias(), 2375 ) 2376 2377 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2378 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2379 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2380 2381 if outer_apply or cross_apply: 2382 this = self._parse_select(table=True) 2383 view = None 2384 outer = not cross_apply 2385 elif self._match(TokenType.LATERAL): 2386 this = self._parse_select(table=True) 2387 view = self._match(TokenType.VIEW) 2388 outer = self._match(TokenType.OUTER) 2389 else: 2390 return None 2391 2392 if not this: 2393 this = ( 2394 self._parse_unnest() 2395 or self._parse_function() 2396 or self._parse_id_var(any_token=False) 2397 ) 2398 2399 while self._match(TokenType.DOT): 2400 this = exp.Dot( 2401 this=this, 2402 expression=self._parse_function() or self._parse_id_var(any_token=False), 2403 ) 2404 2405 if view: 2406 table = self._parse_id_var(any_token=False) 2407 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2408 table_alias: t.Optional[exp.TableAlias] = self.expression( 2409 exp.TableAlias, this=table, columns=columns 2410 ) 2411 elif isinstance(this, exp.Subquery) and this.alias: 2412 # Ensures parity between the Subquery's and the Lateral's "alias" args 2413 table_alias = this.args["alias"].copy() 2414 else: 2415 table_alias = self._parse_table_alias() 2416 2417 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2418 2419 def _parse_join_parts( 2420 self, 2421 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2422 return ( 2423 self._match_set(self.JOIN_METHODS) and self._prev, 2424 self._match_set(self.JOIN_SIDES) and self._prev, 2425 self._match_set(self.JOIN_KINDS) and self._prev, 2426 ) 2427 2428 def _parse_join( 2429 self, skip_join_token: bool = False, parse_bracket: bool = False 2430 ) -> t.Optional[exp.Join]: 2431 if self._match(TokenType.COMMA): 2432 return self.expression(exp.Join, this=self._parse_table()) 2433 2434 index = self._index 2435 method, side, kind = self._parse_join_parts() 2436 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2437 join = self._match(TokenType.JOIN) 2438 2439 if not skip_join_token and not join: 2440 self._retreat(index) 2441 kind = None 2442 method = None 2443 side = None 2444 2445 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2446 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2447 2448 if not skip_join_token and not join and not outer_apply and not cross_apply: 2449 return None 2450 2451 if outer_apply: 2452 side = Token(TokenType.LEFT, "LEFT") 2453 2454 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2455 2456 if method: 2457 kwargs["method"] = method.text 2458 if side: 2459 kwargs["side"] = side.text 2460 if kind: 2461 kwargs["kind"] = kind.text 2462 if hint: 2463 kwargs["hint"] = hint 2464 2465 if self._match(TokenType.ON): 2466 kwargs["on"] = self._parse_conjunction() 2467 elif self._match(TokenType.USING): 2468 kwargs["using"] = self._parse_wrapped_id_vars() 2469 elif not (kind and kind.token_type == TokenType.CROSS): 2470 index = self._index 2471 join = self._parse_join() 2472 2473 if join and self._match(TokenType.ON): 2474 kwargs["on"] = self._parse_conjunction() 2475 elif join and self._match(TokenType.USING): 2476 kwargs["using"] = self._parse_wrapped_id_vars() 2477 else: 2478 join = None 2479 self._retreat(index) 2480 2481 kwargs["this"].set("joins", [join] if join else None) 2482 2483 comments = [c for token in (method, side, kind) if token for c in token.comments] 2484 return self.expression(exp.Join, comments=comments, **kwargs) 2485 2486 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2487 this = self._parse_conjunction() 2488 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2489 return this 2490 2491 opclass = self._parse_var(any_token=True) 2492 if opclass: 2493 return self.expression(exp.Opclass, this=this, expression=opclass) 2494 2495 return this 2496 2497 def _parse_index( 2498 self, 2499 index: t.Optional[exp.Expression] = None, 2500 ) -> t.Optional[exp.Index]: 2501 if index: 2502 unique = None 2503 primary = None 2504 amp = None 2505 2506 self._match(TokenType.ON) 2507 self._match(TokenType.TABLE) # hive 2508 table = self._parse_table_parts(schema=True) 2509 else: 2510 unique = self._match(TokenType.UNIQUE) 2511 primary = self._match_text_seq("PRIMARY") 2512 amp = self._match_text_seq("AMP") 2513 2514 if not self._match(TokenType.INDEX): 2515 return None 2516 2517 index = self._parse_id_var() 2518 table = None 2519 2520 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2521 2522 if self._match(TokenType.L_PAREN, advance=False): 2523 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2524 else: 2525 columns = None 2526 2527 return self.expression( 2528 exp.Index, 2529 this=index, 2530 table=table, 2531 using=using, 2532 columns=columns, 2533 unique=unique, 2534 primary=primary, 2535 amp=amp, 2536 partition_by=self._parse_partition_by(), 2537 where=self._parse_where(), 2538 ) 2539 2540 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2541 hints: t.List[exp.Expression] = [] 2542 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2543 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2544 hints.append( 2545 self.expression( 2546 exp.WithTableHint, 2547 expressions=self._parse_csv( 2548 lambda: self._parse_function() or self._parse_var(any_token=True) 2549 ), 2550 ) 2551 ) 2552 self._match_r_paren() 2553 else: 2554 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2555 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2556 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2557 2558 self._match_texts({"INDEX", "KEY"}) 2559 if self._match(TokenType.FOR): 2560 hint.set("target", self._advance_any() and self._prev.text.upper()) 2561 2562 hint.set("expressions", self._parse_wrapped_id_vars()) 2563 hints.append(hint) 2564 2565 return hints or None 2566 2567 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2568 return ( 2569 (not schema and self._parse_function(optional_parens=False)) 2570 or self._parse_id_var(any_token=False) 2571 or self._parse_string_as_identifier() 2572 or self._parse_placeholder() 2573 ) 2574 2575 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2576 catalog = None 2577 db = None 2578 table = self._parse_table_part(schema=schema) 2579 2580 while self._match(TokenType.DOT): 2581 if catalog: 2582 # This allows nesting the table in arbitrarily many dot expressions if needed 2583 table = self.expression( 2584 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2585 ) 2586 else: 2587 catalog = db 2588 db = table 2589 table = self._parse_table_part(schema=schema) 2590 2591 if not table: 2592 self.raise_error(f"Expected table name but got {self._curr}") 2593 2594 return self.expression( 2595 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2596 ) 2597 2598 def _parse_table( 2599 self, 2600 schema: bool = False, 2601 joins: bool = False, 2602 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2603 parse_bracket: bool = False, 2604 ) -> t.Optional[exp.Expression]: 2605 lateral = self._parse_lateral() 2606 if lateral: 2607 return lateral 2608 2609 unnest = self._parse_unnest() 2610 if unnest: 2611 return unnest 2612 2613 values = self._parse_derived_table_values() 2614 if values: 2615 return values 2616 2617 subquery = self._parse_select(table=True) 2618 if subquery: 2619 if not subquery.args.get("pivots"): 2620 subquery.set("pivots", self._parse_pivots()) 2621 return subquery 2622 2623 bracket = parse_bracket and self._parse_bracket(None) 2624 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2625 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2626 2627 if schema: 2628 return self._parse_schema(this=this) 2629 2630 version = self._parse_version() 2631 2632 if version: 2633 this.set("version", version) 2634 2635 if self.ALIAS_POST_TABLESAMPLE: 2636 table_sample = self._parse_table_sample() 2637 2638 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2639 if alias: 2640 this.set("alias", alias) 2641 2642 this.set("hints", self._parse_table_hints()) 2643 2644 if not this.args.get("pivots"): 2645 this.set("pivots", self._parse_pivots()) 2646 2647 if not self.ALIAS_POST_TABLESAMPLE: 2648 table_sample = self._parse_table_sample() 2649 2650 if table_sample: 2651 table_sample.set("this", this) 2652 this = table_sample 2653 2654 if joins: 2655 for join in iter(self._parse_join, None): 2656 this.append("joins", join) 2657 2658 return this 2659 2660 def _parse_version(self) -> t.Optional[exp.Version]: 2661 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2662 this = "TIMESTAMP" 2663 elif self._match(TokenType.VERSION_SNAPSHOT): 2664 this = "VERSION" 2665 else: 2666 return None 2667 2668 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2669 kind = self._prev.text.upper() 2670 start = self._parse_bitwise() 2671 self._match_texts(("TO", "AND")) 2672 end = self._parse_bitwise() 2673 expression: t.Optional[exp.Expression] = self.expression( 2674 exp.Tuple, expressions=[start, end] 2675 ) 2676 elif self._match_text_seq("CONTAINED", "IN"): 2677 kind = "CONTAINED IN" 2678 expression = self.expression( 2679 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2680 ) 2681 elif self._match(TokenType.ALL): 2682 kind = "ALL" 2683 expression = None 2684 else: 2685 self._match_text_seq("AS", "OF") 2686 kind = "AS OF" 2687 expression = self._parse_type() 2688 2689 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2690 2691 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2692 if not self._match(TokenType.UNNEST): 2693 return None 2694 2695 expressions = self._parse_wrapped_csv(self._parse_type) 2696 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2697 2698 alias = self._parse_table_alias() if with_alias else None 2699 2700 if alias: 2701 if self.UNNEST_COLUMN_ONLY: 2702 if alias.args.get("columns"): 2703 self.raise_error("Unexpected extra column alias in unnest.") 2704 2705 alias.set("columns", [alias.this]) 2706 alias.set("this", None) 2707 2708 columns = alias.args.get("columns") or [] 2709 if offset and len(expressions) < len(columns): 2710 offset = columns.pop() 2711 2712 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2713 self._match(TokenType.ALIAS) 2714 offset = self._parse_id_var( 2715 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2716 ) or exp.to_identifier("offset") 2717 2718 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2719 2720 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2721 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2722 if not is_derived and not self._match(TokenType.VALUES): 2723 return None 2724 2725 expressions = self._parse_csv(self._parse_value) 2726 alias = self._parse_table_alias() 2727 2728 if is_derived: 2729 self._match_r_paren() 2730 2731 return self.expression( 2732 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2733 ) 2734 2735 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2736 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2737 as_modifier and self._match_text_seq("USING", "SAMPLE") 2738 ): 2739 return None 2740 2741 bucket_numerator = None 2742 bucket_denominator = None 2743 bucket_field = None 2744 percent = None 2745 rows = None 2746 size = None 2747 seed = None 2748 2749 kind = ( 2750 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2751 ) 2752 method = self._parse_var(tokens=(TokenType.ROW,)) 2753 2754 matched_l_paren = self._match(TokenType.L_PAREN) 2755 2756 if self.TABLESAMPLE_CSV: 2757 num = None 2758 expressions = self._parse_csv(self._parse_primary) 2759 else: 2760 expressions = None 2761 num = ( 2762 self._parse_factor() 2763 if self._match(TokenType.NUMBER, advance=False) 2764 else self._parse_primary() 2765 ) 2766 2767 if self._match_text_seq("BUCKET"): 2768 bucket_numerator = self._parse_number() 2769 self._match_text_seq("OUT", "OF") 2770 bucket_denominator = bucket_denominator = self._parse_number() 2771 self._match(TokenType.ON) 2772 bucket_field = self._parse_field() 2773 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2774 percent = num 2775 elif self._match(TokenType.ROWS): 2776 rows = num 2777 elif num: 2778 size = num 2779 2780 if matched_l_paren: 2781 self._match_r_paren() 2782 2783 if self._match(TokenType.L_PAREN): 2784 method = self._parse_var() 2785 seed = self._match(TokenType.COMMA) and self._parse_number() 2786 self._match_r_paren() 2787 elif self._match_texts(("SEED", "REPEATABLE")): 2788 seed = self._parse_wrapped(self._parse_number) 2789 2790 return self.expression( 2791 exp.TableSample, 2792 expressions=expressions, 2793 method=method, 2794 bucket_numerator=bucket_numerator, 2795 bucket_denominator=bucket_denominator, 2796 bucket_field=bucket_field, 2797 percent=percent, 2798 rows=rows, 2799 size=size, 2800 seed=seed, 2801 kind=kind, 2802 ) 2803 2804 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2805 return list(iter(self._parse_pivot, None)) or None 2806 2807 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2808 return list(iter(self._parse_join, None)) or None 2809 2810 # https://duckdb.org/docs/sql/statements/pivot 2811 def _parse_simplified_pivot(self) -> exp.Pivot: 2812 def _parse_on() -> t.Optional[exp.Expression]: 2813 this = self._parse_bitwise() 2814 return self._parse_in(this) if self._match(TokenType.IN) else this 2815 2816 this = self._parse_table() 2817 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2818 using = self._match(TokenType.USING) and self._parse_csv( 2819 lambda: self._parse_alias(self._parse_function()) 2820 ) 2821 group = self._parse_group() 2822 return self.expression( 2823 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2824 ) 2825 2826 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2827 index = self._index 2828 include_nulls = None 2829 2830 if self._match(TokenType.PIVOT): 2831 unpivot = False 2832 elif self._match(TokenType.UNPIVOT): 2833 unpivot = True 2834 2835 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2836 if self._match_text_seq("INCLUDE", "NULLS"): 2837 include_nulls = True 2838 elif self._match_text_seq("EXCLUDE", "NULLS"): 2839 include_nulls = False 2840 else: 2841 return None 2842 2843 expressions = [] 2844 field = None 2845 2846 if not self._match(TokenType.L_PAREN): 2847 self._retreat(index) 2848 return None 2849 2850 if unpivot: 2851 expressions = self._parse_csv(self._parse_column) 2852 else: 2853 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2854 2855 if not expressions: 2856 self.raise_error("Failed to parse PIVOT's aggregation list") 2857 2858 if not self._match(TokenType.FOR): 2859 self.raise_error("Expecting FOR") 2860 2861 value = self._parse_column() 2862 2863 if not self._match(TokenType.IN): 2864 self.raise_error("Expecting IN") 2865 2866 field = self._parse_in(value, alias=True) 2867 2868 self._match_r_paren() 2869 2870 pivot = self.expression( 2871 exp.Pivot, 2872 expressions=expressions, 2873 field=field, 2874 unpivot=unpivot, 2875 include_nulls=include_nulls, 2876 ) 2877 2878 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2879 pivot.set("alias", self._parse_table_alias()) 2880 2881 if not unpivot: 2882 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2883 2884 columns: t.List[exp.Expression] = [] 2885 for fld in pivot.args["field"].expressions: 2886 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2887 for name in names: 2888 if self.PREFIXED_PIVOT_COLUMNS: 2889 name = f"{name}_{field_name}" if name else field_name 2890 else: 2891 name = f"{field_name}_{name}" if name else field_name 2892 2893 columns.append(exp.to_identifier(name)) 2894 2895 pivot.set("columns", columns) 2896 2897 return pivot 2898 2899 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2900 return [agg.alias for agg in aggregations] 2901 2902 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2903 if not skip_where_token and not self._match(TokenType.WHERE): 2904 return None 2905 2906 return self.expression( 2907 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2908 ) 2909 2910 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2911 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2912 return None 2913 2914 elements = defaultdict(list) 2915 2916 if self._match(TokenType.ALL): 2917 return self.expression(exp.Group, all=True) 2918 2919 while True: 2920 expressions = self._parse_csv(self._parse_conjunction) 2921 if expressions: 2922 elements["expressions"].extend(expressions) 2923 2924 grouping_sets = self._parse_grouping_sets() 2925 if grouping_sets: 2926 elements["grouping_sets"].extend(grouping_sets) 2927 2928 rollup = None 2929 cube = None 2930 totals = None 2931 2932 with_ = self._match(TokenType.WITH) 2933 if self._match(TokenType.ROLLUP): 2934 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2935 elements["rollup"].extend(ensure_list(rollup)) 2936 2937 if self._match(TokenType.CUBE): 2938 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2939 elements["cube"].extend(ensure_list(cube)) 2940 2941 if self._match_text_seq("TOTALS"): 2942 totals = True 2943 elements["totals"] = True # type: ignore 2944 2945 if not (grouping_sets or rollup or cube or totals): 2946 break 2947 2948 return self.expression(exp.Group, **elements) # type: ignore 2949 2950 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2951 if not self._match(TokenType.GROUPING_SETS): 2952 return None 2953 2954 return self._parse_wrapped_csv(self._parse_grouping_set) 2955 2956 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2957 if self._match(TokenType.L_PAREN): 2958 grouping_set = self._parse_csv(self._parse_column) 2959 self._match_r_paren() 2960 return self.expression(exp.Tuple, expressions=grouping_set) 2961 2962 return self._parse_column() 2963 2964 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2965 if not skip_having_token and not self._match(TokenType.HAVING): 2966 return None 2967 return self.expression(exp.Having, this=self._parse_conjunction()) 2968 2969 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2970 if not self._match(TokenType.QUALIFY): 2971 return None 2972 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2973 2974 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2975 if skip_start_token: 2976 start = None 2977 elif self._match(TokenType.START_WITH): 2978 start = self._parse_conjunction() 2979 else: 2980 return None 2981 2982 self._match(TokenType.CONNECT_BY) 2983 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2984 exp.Prior, this=self._parse_bitwise() 2985 ) 2986 connect = self._parse_conjunction() 2987 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2988 2989 if not start and self._match(TokenType.START_WITH): 2990 start = self._parse_conjunction() 2991 2992 return self.expression(exp.Connect, start=start, connect=connect) 2993 2994 def _parse_order( 2995 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2996 ) -> t.Optional[exp.Expression]: 2997 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2998 return this 2999 3000 return self.expression( 3001 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3002 ) 3003 3004 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3005 if not self._match(token): 3006 return None 3007 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3008 3009 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3010 this = parse_method() if parse_method else self._parse_conjunction() 3011 3012 asc = self._match(TokenType.ASC) 3013 desc = self._match(TokenType.DESC) or (asc and False) 3014 3015 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3016 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3017 3018 nulls_first = is_nulls_first or False 3019 explicitly_null_ordered = is_nulls_first or is_nulls_last 3020 3021 if ( 3022 not explicitly_null_ordered 3023 and ( 3024 (not desc and self.NULL_ORDERING == "nulls_are_small") 3025 or (desc and self.NULL_ORDERING != "nulls_are_small") 3026 ) 3027 and self.NULL_ORDERING != "nulls_are_last" 3028 ): 3029 nulls_first = True 3030 3031 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3032 3033 def _parse_limit( 3034 self, this: t.Optional[exp.Expression] = None, top: bool = False 3035 ) -> t.Optional[exp.Expression]: 3036 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3037 comments = self._prev_comments 3038 if top: 3039 limit_paren = self._match(TokenType.L_PAREN) 3040 expression = self._parse_number() 3041 3042 if limit_paren: 3043 self._match_r_paren() 3044 else: 3045 expression = self._parse_term() 3046 3047 if self._match(TokenType.COMMA): 3048 offset = expression 3049 expression = self._parse_term() 3050 else: 3051 offset = None 3052 3053 limit_exp = self.expression( 3054 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3055 ) 3056 3057 return limit_exp 3058 3059 if self._match(TokenType.FETCH): 3060 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3061 direction = self._prev.text if direction else "FIRST" 3062 3063 count = self._parse_field(tokens=self.FETCH_TOKENS) 3064 percent = self._match(TokenType.PERCENT) 3065 3066 self._match_set((TokenType.ROW, TokenType.ROWS)) 3067 3068 only = self._match_text_seq("ONLY") 3069 with_ties = self._match_text_seq("WITH", "TIES") 3070 3071 if only and with_ties: 3072 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3073 3074 return self.expression( 3075 exp.Fetch, 3076 direction=direction, 3077 count=count, 3078 percent=percent, 3079 with_ties=with_ties, 3080 ) 3081 3082 return this 3083 3084 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3085 if not self._match(TokenType.OFFSET): 3086 return this 3087 3088 count = self._parse_term() 3089 self._match_set((TokenType.ROW, TokenType.ROWS)) 3090 return self.expression(exp.Offset, this=this, expression=count) 3091 3092 def _parse_locks(self) -> t.List[exp.Lock]: 3093 locks = [] 3094 while True: 3095 if self._match_text_seq("FOR", "UPDATE"): 3096 update = True 3097 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3098 "LOCK", "IN", "SHARE", "MODE" 3099 ): 3100 update = False 3101 else: 3102 break 3103 3104 expressions = None 3105 if self._match_text_seq("OF"): 3106 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3107 3108 wait: t.Optional[bool | exp.Expression] = None 3109 if self._match_text_seq("NOWAIT"): 3110 wait = True 3111 elif self._match_text_seq("WAIT"): 3112 wait = self._parse_primary() 3113 elif self._match_text_seq("SKIP", "LOCKED"): 3114 wait = False 3115 3116 locks.append( 3117 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3118 ) 3119 3120 return locks 3121 3122 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3123 if not self._match_set(self.SET_OPERATIONS): 3124 return this 3125 3126 token_type = self._prev.token_type 3127 3128 if token_type == TokenType.UNION: 3129 expression = exp.Union 3130 elif token_type == TokenType.EXCEPT: 3131 expression = exp.Except 3132 else: 3133 expression = exp.Intersect 3134 3135 return self.expression( 3136 expression, 3137 this=this, 3138 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3139 by_name=self._match_text_seq("BY", "NAME"), 3140 expression=self._parse_set_operations(self._parse_select(nested=True)), 3141 ) 3142 3143 def _parse_expression(self) -> t.Optional[exp.Expression]: 3144 return self._parse_alias(self._parse_conjunction()) 3145 3146 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3147 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3148 3149 def _parse_equality(self) -> t.Optional[exp.Expression]: 3150 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3151 3152 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3153 return self._parse_tokens(self._parse_range, self.COMPARISON) 3154 3155 def _parse_range(self) -> t.Optional[exp.Expression]: 3156 this = self._parse_bitwise() 3157 negate = self._match(TokenType.NOT) 3158 3159 if self._match_set(self.RANGE_PARSERS): 3160 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3161 if not expression: 3162 return this 3163 3164 this = expression 3165 elif self._match(TokenType.ISNULL): 3166 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3167 3168 # Postgres supports ISNULL and NOTNULL for conditions. 3169 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3170 if self._match(TokenType.NOTNULL): 3171 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3172 this = self.expression(exp.Not, this=this) 3173 3174 if negate: 3175 this = self.expression(exp.Not, this=this) 3176 3177 if self._match(TokenType.IS): 3178 this = self._parse_is(this) 3179 3180 return this 3181 3182 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3183 index = self._index - 1 3184 negate = self._match(TokenType.NOT) 3185 3186 if self._match_text_seq("DISTINCT", "FROM"): 3187 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3188 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3189 3190 expression = self._parse_null() or self._parse_boolean() 3191 if not expression: 3192 self._retreat(index) 3193 return None 3194 3195 this = self.expression(exp.Is, this=this, expression=expression) 3196 return self.expression(exp.Not, this=this) if negate else this 3197 3198 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3199 unnest = self._parse_unnest(with_alias=False) 3200 if unnest: 3201 this = self.expression(exp.In, this=this, unnest=unnest) 3202 elif self._match(TokenType.L_PAREN): 3203 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3204 3205 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3206 this = self.expression(exp.In, this=this, query=expressions[0]) 3207 else: 3208 this = self.expression(exp.In, this=this, expressions=expressions) 3209 3210 self._match_r_paren(this) 3211 else: 3212 this = self.expression(exp.In, this=this, field=self._parse_field()) 3213 3214 return this 3215 3216 def _parse_between(self, this: exp.Expression) -> exp.Between: 3217 low = self._parse_bitwise() 3218 self._match(TokenType.AND) 3219 high = self._parse_bitwise() 3220 return self.expression(exp.Between, this=this, low=low, high=high) 3221 3222 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3223 if not self._match(TokenType.ESCAPE): 3224 return this 3225 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3226 3227 def _parse_interval(self) -> t.Optional[exp.Interval]: 3228 index = self._index 3229 3230 if not self._match(TokenType.INTERVAL): 3231 return None 3232 3233 if self._match(TokenType.STRING, advance=False): 3234 this = self._parse_primary() 3235 else: 3236 this = self._parse_term() 3237 3238 if not this: 3239 self._retreat(index) 3240 return None 3241 3242 unit = self._parse_function() or self._parse_var(any_token=True) 3243 3244 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3245 # each INTERVAL expression into this canonical form so it's easy to transpile 3246 if this and this.is_number: 3247 this = exp.Literal.string(this.name) 3248 elif this and this.is_string: 3249 parts = this.name.split() 3250 3251 if len(parts) == 2: 3252 if unit: 3253 # This is not actually a unit, it's something else (e.g. a "window side") 3254 unit = None 3255 self._retreat(self._index - 1) 3256 3257 this = exp.Literal.string(parts[0]) 3258 unit = self.expression(exp.Var, this=parts[1]) 3259 3260 return self.expression(exp.Interval, this=this, unit=unit) 3261 3262 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3263 this = self._parse_term() 3264 3265 while True: 3266 if self._match_set(self.BITWISE): 3267 this = self.expression( 3268 self.BITWISE[self._prev.token_type], 3269 this=this, 3270 expression=self._parse_term(), 3271 ) 3272 elif self._match(TokenType.DQMARK): 3273 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3274 elif self._match_pair(TokenType.LT, TokenType.LT): 3275 this = self.expression( 3276 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3277 ) 3278 elif self._match_pair(TokenType.GT, TokenType.GT): 3279 this = self.expression( 3280 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3281 ) 3282 else: 3283 break 3284 3285 return this 3286 3287 def _parse_term(self) -> t.Optional[exp.Expression]: 3288 return self._parse_tokens(self._parse_factor, self.TERM) 3289 3290 def _parse_factor(self) -> t.Optional[exp.Expression]: 3291 return self._parse_tokens(self._parse_unary, self.FACTOR) 3292 3293 def _parse_unary(self) -> t.Optional[exp.Expression]: 3294 if self._match_set(self.UNARY_PARSERS): 3295 return self.UNARY_PARSERS[self._prev.token_type](self) 3296 return self._parse_at_time_zone(self._parse_type()) 3297 3298 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3299 interval = parse_interval and self._parse_interval() 3300 if interval: 3301 return interval 3302 3303 index = self._index 3304 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3305 this = self._parse_column() 3306 3307 if data_type: 3308 if isinstance(this, exp.Literal): 3309 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3310 if parser: 3311 return parser(self, this, data_type) 3312 return self.expression(exp.Cast, this=this, to=data_type) 3313 if not data_type.expressions: 3314 self._retreat(index) 3315 return self._parse_column() 3316 return self._parse_column_ops(data_type) 3317 3318 return this and self._parse_column_ops(this) 3319 3320 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3321 this = self._parse_type() 3322 if not this: 3323 return None 3324 3325 return self.expression( 3326 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3327 ) 3328 3329 def _parse_types( 3330 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3331 ) -> t.Optional[exp.Expression]: 3332 index = self._index 3333 3334 prefix = self._match_text_seq("SYSUDTLIB", ".") 3335 3336 if not self._match_set(self.TYPE_TOKENS): 3337 identifier = allow_identifiers and self._parse_id_var( 3338 any_token=False, tokens=(TokenType.VAR,) 3339 ) 3340 3341 if identifier: 3342 tokens = self._tokenizer.tokenize(identifier.name) 3343 3344 if len(tokens) != 1: 3345 self.raise_error("Unexpected identifier", self._prev) 3346 3347 if tokens[0].token_type in self.TYPE_TOKENS: 3348 self._prev = tokens[0] 3349 elif self.SUPPORTS_USER_DEFINED_TYPES: 3350 type_name = identifier.name 3351 3352 while self._match(TokenType.DOT): 3353 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3354 3355 return exp.DataType.build(type_name, udt=True) 3356 else: 3357 return None 3358 else: 3359 return None 3360 3361 type_token = self._prev.token_type 3362 3363 if type_token == TokenType.PSEUDO_TYPE: 3364 return self.expression(exp.PseudoType, this=self._prev.text) 3365 3366 if type_token == TokenType.OBJECT_IDENTIFIER: 3367 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3368 3369 nested = type_token in self.NESTED_TYPE_TOKENS 3370 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3371 expressions = None 3372 maybe_func = False 3373 3374 if self._match(TokenType.L_PAREN): 3375 if is_struct: 3376 expressions = self._parse_csv(self._parse_struct_types) 3377 elif nested: 3378 expressions = self._parse_csv( 3379 lambda: self._parse_types( 3380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3381 ) 3382 ) 3383 elif type_token in self.ENUM_TYPE_TOKENS: 3384 expressions = self._parse_csv(self._parse_equality) 3385 else: 3386 expressions = self._parse_csv(self._parse_type_size) 3387 3388 if not expressions or not self._match(TokenType.R_PAREN): 3389 self._retreat(index) 3390 return None 3391 3392 maybe_func = True 3393 3394 this: t.Optional[exp.Expression] = None 3395 values: t.Optional[t.List[exp.Expression]] = None 3396 3397 if nested and self._match(TokenType.LT): 3398 if is_struct: 3399 expressions = self._parse_csv(self._parse_struct_types) 3400 else: 3401 expressions = self._parse_csv( 3402 lambda: self._parse_types( 3403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3404 ) 3405 ) 3406 3407 if not self._match(TokenType.GT): 3408 self.raise_error("Expecting >") 3409 3410 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3411 values = self._parse_csv(self._parse_conjunction) 3412 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3413 3414 if type_token in self.TIMESTAMPS: 3415 if self._match_text_seq("WITH", "TIME", "ZONE"): 3416 maybe_func = False 3417 tz_type = ( 3418 exp.DataType.Type.TIMETZ 3419 if type_token in self.TIMES 3420 else exp.DataType.Type.TIMESTAMPTZ 3421 ) 3422 this = exp.DataType(this=tz_type, expressions=expressions) 3423 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3424 maybe_func = False 3425 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3426 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3427 maybe_func = False 3428 elif type_token == TokenType.INTERVAL: 3429 unit = self._parse_var() 3430 3431 if self._match_text_seq("TO"): 3432 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3433 else: 3434 span = None 3435 3436 if span or not unit: 3437 this = self.expression( 3438 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3439 ) 3440 else: 3441 this = self.expression(exp.Interval, unit=unit) 3442 3443 if maybe_func and check_func: 3444 index2 = self._index 3445 peek = self._parse_string() 3446 3447 if not peek: 3448 self._retreat(index) 3449 return None 3450 3451 self._retreat(index2) 3452 3453 if not this: 3454 if self._match_text_seq("UNSIGNED"): 3455 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3456 if not unsigned_type_token: 3457 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3458 3459 type_token = unsigned_type_token or type_token 3460 3461 this = exp.DataType( 3462 this=exp.DataType.Type[type_token.value], 3463 expressions=expressions, 3464 nested=nested, 3465 values=values, 3466 prefix=prefix, 3467 ) 3468 3469 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3470 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3471 3472 return this 3473 3474 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3475 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3476 self._match(TokenType.COLON) 3477 return self._parse_column_def(this) 3478 3479 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3480 if not self._match_text_seq("AT", "TIME", "ZONE"): 3481 return this 3482 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3483 3484 def _parse_column(self) -> t.Optional[exp.Expression]: 3485 this = self._parse_field() 3486 if isinstance(this, exp.Identifier): 3487 this = self.expression(exp.Column, this=this) 3488 elif not this: 3489 return self._parse_bracket(this) 3490 return self._parse_column_ops(this) 3491 3492 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3493 this = self._parse_bracket(this) 3494 3495 while self._match_set(self.COLUMN_OPERATORS): 3496 op_token = self._prev.token_type 3497 op = self.COLUMN_OPERATORS.get(op_token) 3498 3499 if op_token == TokenType.DCOLON: 3500 field = self._parse_types() 3501 if not field: 3502 self.raise_error("Expected type") 3503 elif op and self._curr: 3504 self._advance() 3505 value = self._prev.text 3506 field = ( 3507 exp.Literal.number(value) 3508 if self._prev.token_type == TokenType.NUMBER 3509 else exp.Literal.string(value) 3510 ) 3511 else: 3512 field = self._parse_field(anonymous_func=True, any_token=True) 3513 3514 if isinstance(field, exp.Func): 3515 # bigquery allows function calls like x.y.count(...) 3516 # SAFE.SUBSTR(...) 3517 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3518 this = self._replace_columns_with_dots(this) 3519 3520 if op: 3521 this = op(self, this, field) 3522 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3523 this = self.expression( 3524 exp.Column, 3525 this=field, 3526 table=this.this, 3527 db=this.args.get("table"), 3528 catalog=this.args.get("db"), 3529 ) 3530 else: 3531 this = self.expression(exp.Dot, this=this, expression=field) 3532 this = self._parse_bracket(this) 3533 return this 3534 3535 def _parse_primary(self) -> t.Optional[exp.Expression]: 3536 if self._match_set(self.PRIMARY_PARSERS): 3537 token_type = self._prev.token_type 3538 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3539 3540 if token_type == TokenType.STRING: 3541 expressions = [primary] 3542 while self._match(TokenType.STRING): 3543 expressions.append(exp.Literal.string(self._prev.text)) 3544 3545 if len(expressions) > 1: 3546 return self.expression(exp.Concat, expressions=expressions) 3547 3548 return primary 3549 3550 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3551 return exp.Literal.number(f"0.{self._prev.text}") 3552 3553 if self._match(TokenType.L_PAREN): 3554 comments = self._prev_comments 3555 query = self._parse_select() 3556 3557 if query: 3558 expressions = [query] 3559 else: 3560 expressions = self._parse_expressions() 3561 3562 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3563 3564 if isinstance(this, exp.Subqueryable): 3565 this = self._parse_set_operations( 3566 self._parse_subquery(this=this, parse_alias=False) 3567 ) 3568 elif len(expressions) > 1: 3569 this = self.expression(exp.Tuple, expressions=expressions) 3570 else: 3571 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3572 3573 if this: 3574 this.add_comments(comments) 3575 3576 self._match_r_paren(expression=this) 3577 return this 3578 3579 return None 3580 3581 def _parse_field( 3582 self, 3583 any_token: bool = False, 3584 tokens: t.Optional[t.Collection[TokenType]] = None, 3585 anonymous_func: bool = False, 3586 ) -> t.Optional[exp.Expression]: 3587 return ( 3588 self._parse_primary() 3589 or self._parse_function(anonymous=anonymous_func) 3590 or self._parse_id_var(any_token=any_token, tokens=tokens) 3591 ) 3592 3593 def _parse_function( 3594 self, 3595 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3596 anonymous: bool = False, 3597 optional_parens: bool = True, 3598 ) -> t.Optional[exp.Expression]: 3599 if not self._curr: 3600 return None 3601 3602 token_type = self._curr.token_type 3603 this = self._curr.text 3604 upper = this.upper() 3605 3606 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3607 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3608 self._advance() 3609 return parser(self) 3610 3611 if not self._next or self._next.token_type != TokenType.L_PAREN: 3612 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3613 self._advance() 3614 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3615 3616 return None 3617 3618 if token_type not in self.FUNC_TOKENS: 3619 return None 3620 3621 self._advance(2) 3622 3623 parser = self.FUNCTION_PARSERS.get(upper) 3624 if parser and not anonymous: 3625 this = parser(self) 3626 else: 3627 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3628 3629 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3630 this = self.expression(subquery_predicate, this=self._parse_select()) 3631 self._match_r_paren() 3632 return this 3633 3634 if functions is None: 3635 functions = self.FUNCTIONS 3636 3637 function = functions.get(upper) 3638 3639 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3640 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3641 3642 if function and not anonymous: 3643 func = self.validate_expression(function(args), args) 3644 if not self.NORMALIZE_FUNCTIONS: 3645 func.meta["name"] = this 3646 this = func 3647 else: 3648 this = self.expression(exp.Anonymous, this=this, expressions=args) 3649 3650 self._match_r_paren(this) 3651 return self._parse_window(this) 3652 3653 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3654 return self._parse_column_def(self._parse_id_var()) 3655 3656 def _parse_user_defined_function( 3657 self, kind: t.Optional[TokenType] = None 3658 ) -> t.Optional[exp.Expression]: 3659 this = self._parse_id_var() 3660 3661 while self._match(TokenType.DOT): 3662 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3663 3664 if not self._match(TokenType.L_PAREN): 3665 return this 3666 3667 expressions = self._parse_csv(self._parse_function_parameter) 3668 self._match_r_paren() 3669 return self.expression( 3670 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3671 ) 3672 3673 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3674 literal = self._parse_primary() 3675 if literal: 3676 return self.expression(exp.Introducer, this=token.text, expression=literal) 3677 3678 return self.expression(exp.Identifier, this=token.text) 3679 3680 def _parse_session_parameter(self) -> exp.SessionParameter: 3681 kind = None 3682 this = self._parse_id_var() or self._parse_primary() 3683 3684 if this and self._match(TokenType.DOT): 3685 kind = this.name 3686 this = self._parse_var() or self._parse_primary() 3687 3688 return self.expression(exp.SessionParameter, this=this, kind=kind) 3689 3690 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3691 index = self._index 3692 3693 if self._match(TokenType.L_PAREN): 3694 expressions = t.cast( 3695 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3696 ) 3697 3698 if not self._match(TokenType.R_PAREN): 3699 self._retreat(index) 3700 else: 3701 expressions = [self._parse_id_var()] 3702 3703 if self._match_set(self.LAMBDAS): 3704 return self.LAMBDAS[self._prev.token_type](self, expressions) 3705 3706 self._retreat(index) 3707 3708 this: t.Optional[exp.Expression] 3709 3710 if self._match(TokenType.DISTINCT): 3711 this = self.expression( 3712 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3713 ) 3714 else: 3715 this = self._parse_select_or_expression(alias=alias) 3716 3717 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3718 3719 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3720 index = self._index 3721 3722 if not self.errors: 3723 try: 3724 if self._parse_select(nested=True): 3725 return this 3726 except ParseError: 3727 pass 3728 finally: 3729 self.errors.clear() 3730 self._retreat(index) 3731 3732 if not self._match(TokenType.L_PAREN): 3733 return this 3734 3735 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3736 3737 self._match_r_paren() 3738 return self.expression(exp.Schema, this=this, expressions=args) 3739 3740 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3741 return self._parse_column_def(self._parse_field(any_token=True)) 3742 3743 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3744 # column defs are not really columns, they're identifiers 3745 if isinstance(this, exp.Column): 3746 this = this.this 3747 3748 kind = self._parse_types(schema=True) 3749 3750 if self._match_text_seq("FOR", "ORDINALITY"): 3751 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3752 3753 constraints: t.List[exp.Expression] = [] 3754 3755 if not kind and self._match(TokenType.ALIAS): 3756 constraints.append( 3757 self.expression( 3758 exp.ComputedColumnConstraint, 3759 this=self._parse_conjunction(), 3760 persisted=self._match_text_seq("PERSISTED"), 3761 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3762 ) 3763 ) 3764 3765 while True: 3766 constraint = self._parse_column_constraint() 3767 if not constraint: 3768 break 3769 constraints.append(constraint) 3770 3771 if not kind and not constraints: 3772 return this 3773 3774 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3775 3776 def _parse_auto_increment( 3777 self, 3778 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3779 start = None 3780 increment = None 3781 3782 if self._match(TokenType.L_PAREN, advance=False): 3783 args = self._parse_wrapped_csv(self._parse_bitwise) 3784 start = seq_get(args, 0) 3785 increment = seq_get(args, 1) 3786 elif self._match_text_seq("START"): 3787 start = self._parse_bitwise() 3788 self._match_text_seq("INCREMENT") 3789 increment = self._parse_bitwise() 3790 3791 if start and increment: 3792 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3793 3794 return exp.AutoIncrementColumnConstraint() 3795 3796 def _parse_compress(self) -> exp.CompressColumnConstraint: 3797 if self._match(TokenType.L_PAREN, advance=False): 3798 return self.expression( 3799 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3800 ) 3801 3802 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3803 3804 def _parse_generated_as_identity( 3805 self, 3806 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3807 if self._match_text_seq("BY", "DEFAULT"): 3808 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3809 this = self.expression( 3810 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3811 ) 3812 else: 3813 self._match_text_seq("ALWAYS") 3814 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3815 3816 self._match(TokenType.ALIAS) 3817 identity = self._match_text_seq("IDENTITY") 3818 3819 if self._match(TokenType.L_PAREN): 3820 if self._match(TokenType.START_WITH): 3821 this.set("start", self._parse_bitwise()) 3822 if self._match_text_seq("INCREMENT", "BY"): 3823 this.set("increment", self._parse_bitwise()) 3824 if self._match_text_seq("MINVALUE"): 3825 this.set("minvalue", self._parse_bitwise()) 3826 if self._match_text_seq("MAXVALUE"): 3827 this.set("maxvalue", self._parse_bitwise()) 3828 3829 if self._match_text_seq("CYCLE"): 3830 this.set("cycle", True) 3831 elif self._match_text_seq("NO", "CYCLE"): 3832 this.set("cycle", False) 3833 3834 if not identity: 3835 this.set("expression", self._parse_bitwise()) 3836 3837 self._match_r_paren() 3838 3839 return this 3840 3841 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3842 self._match_text_seq("LENGTH") 3843 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3844 3845 def _parse_not_constraint( 3846 self, 3847 ) -> t.Optional[exp.Expression]: 3848 if self._match_text_seq("NULL"): 3849 return self.expression(exp.NotNullColumnConstraint) 3850 if self._match_text_seq("CASESPECIFIC"): 3851 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3852 if self._match_text_seq("FOR", "REPLICATION"): 3853 return self.expression(exp.NotForReplicationColumnConstraint) 3854 return None 3855 3856 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3857 if self._match(TokenType.CONSTRAINT): 3858 this = self._parse_id_var() 3859 else: 3860 this = None 3861 3862 if self._match_texts(self.CONSTRAINT_PARSERS): 3863 return self.expression( 3864 exp.ColumnConstraint, 3865 this=this, 3866 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3867 ) 3868 3869 return this 3870 3871 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3872 if not self._match(TokenType.CONSTRAINT): 3873 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3874 3875 this = self._parse_id_var() 3876 expressions = [] 3877 3878 while True: 3879 constraint = self._parse_unnamed_constraint() or self._parse_function() 3880 if not constraint: 3881 break 3882 expressions.append(constraint) 3883 3884 return self.expression(exp.Constraint, this=this, expressions=expressions) 3885 3886 def _parse_unnamed_constraint( 3887 self, constraints: t.Optional[t.Collection[str]] = None 3888 ) -> t.Optional[exp.Expression]: 3889 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3890 return None 3891 3892 constraint = self._prev.text.upper() 3893 if constraint not in self.CONSTRAINT_PARSERS: 3894 self.raise_error(f"No parser found for schema constraint {constraint}.") 3895 3896 return self.CONSTRAINT_PARSERS[constraint](self) 3897 3898 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3899 self._match_text_seq("KEY") 3900 return self.expression( 3901 exp.UniqueColumnConstraint, 3902 this=self._parse_schema(self._parse_id_var(any_token=False)), 3903 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3904 ) 3905 3906 def _parse_key_constraint_options(self) -> t.List[str]: 3907 options = [] 3908 while True: 3909 if not self._curr: 3910 break 3911 3912 if self._match(TokenType.ON): 3913 action = None 3914 on = self._advance_any() and self._prev.text 3915 3916 if self._match_text_seq("NO", "ACTION"): 3917 action = "NO ACTION" 3918 elif self._match_text_seq("CASCADE"): 3919 action = "CASCADE" 3920 elif self._match_text_seq("RESTRICT"): 3921 action = "RESTRICT" 3922 elif self._match_pair(TokenType.SET, TokenType.NULL): 3923 action = "SET NULL" 3924 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3925 action = "SET DEFAULT" 3926 else: 3927 self.raise_error("Invalid key constraint") 3928 3929 options.append(f"ON {on} {action}") 3930 elif self._match_text_seq("NOT", "ENFORCED"): 3931 options.append("NOT ENFORCED") 3932 elif self._match_text_seq("DEFERRABLE"): 3933 options.append("DEFERRABLE") 3934 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3935 options.append("INITIALLY DEFERRED") 3936 elif self._match_text_seq("NORELY"): 3937 options.append("NORELY") 3938 elif self._match_text_seq("MATCH", "FULL"): 3939 options.append("MATCH FULL") 3940 else: 3941 break 3942 3943 return options 3944 3945 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3946 if match and not self._match(TokenType.REFERENCES): 3947 return None 3948 3949 expressions = None 3950 this = self._parse_table(schema=True) 3951 options = self._parse_key_constraint_options() 3952 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3953 3954 def _parse_foreign_key(self) -> exp.ForeignKey: 3955 expressions = self._parse_wrapped_id_vars() 3956 reference = self._parse_references() 3957 options = {} 3958 3959 while self._match(TokenType.ON): 3960 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3961 self.raise_error("Expected DELETE or UPDATE") 3962 3963 kind = self._prev.text.lower() 3964 3965 if self._match_text_seq("NO", "ACTION"): 3966 action = "NO ACTION" 3967 elif self._match(TokenType.SET): 3968 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3969 action = "SET " + self._prev.text.upper() 3970 else: 3971 self._advance() 3972 action = self._prev.text.upper() 3973 3974 options[kind] = action 3975 3976 return self.expression( 3977 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3978 ) 3979 3980 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3981 return self._parse_field() 3982 3983 def _parse_primary_key( 3984 self, wrapped_optional: bool = False, in_props: bool = False 3985 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3986 desc = ( 3987 self._match_set((TokenType.ASC, TokenType.DESC)) 3988 and self._prev.token_type == TokenType.DESC 3989 ) 3990 3991 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3992 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3993 3994 expressions = self._parse_wrapped_csv( 3995 self._parse_primary_key_part, optional=wrapped_optional 3996 ) 3997 options = self._parse_key_constraint_options() 3998 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3999 4000 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4001 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4002 return this 4003 4004 bracket_kind = self._prev.token_type 4005 4006 if self._match(TokenType.COLON): 4007 expressions: t.List[exp.Expression] = [ 4008 self.expression(exp.Slice, expression=self._parse_conjunction()) 4009 ] 4010 else: 4011 expressions = self._parse_csv( 4012 lambda: self._parse_slice( 4013 self._parse_alias(self._parse_conjunction(), explicit=True) 4014 ) 4015 ) 4016 4017 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4018 if bracket_kind == TokenType.L_BRACE: 4019 this = self.expression(exp.Struct, expressions=expressions) 4020 elif not this or this.name.upper() == "ARRAY": 4021 this = self.expression(exp.Array, expressions=expressions) 4022 else: 4023 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4024 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4025 4026 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4027 self.raise_error("Expected ]") 4028 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4029 self.raise_error("Expected }") 4030 4031 self._add_comments(this) 4032 return self._parse_bracket(this) 4033 4034 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4035 if self._match(TokenType.COLON): 4036 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4037 return this 4038 4039 def _parse_case(self) -> t.Optional[exp.Expression]: 4040 ifs = [] 4041 default = None 4042 4043 comments = self._prev_comments 4044 expression = self._parse_conjunction() 4045 4046 while self._match(TokenType.WHEN): 4047 this = self._parse_conjunction() 4048 self._match(TokenType.THEN) 4049 then = self._parse_conjunction() 4050 ifs.append(self.expression(exp.If, this=this, true=then)) 4051 4052 if self._match(TokenType.ELSE): 4053 default = self._parse_conjunction() 4054 4055 if not self._match(TokenType.END): 4056 self.raise_error("Expected END after CASE", self._prev) 4057 4058 return self._parse_window( 4059 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4060 ) 4061 4062 def _parse_if(self) -> t.Optional[exp.Expression]: 4063 if self._match(TokenType.L_PAREN): 4064 args = self._parse_csv(self._parse_conjunction) 4065 this = self.validate_expression(exp.If.from_arg_list(args), args) 4066 self._match_r_paren() 4067 else: 4068 index = self._index - 1 4069 condition = self._parse_conjunction() 4070 4071 if not condition: 4072 self._retreat(index) 4073 return None 4074 4075 self._match(TokenType.THEN) 4076 true = self._parse_conjunction() 4077 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4078 self._match(TokenType.END) 4079 this = self.expression(exp.If, this=condition, true=true, false=false) 4080 4081 return self._parse_window(this) 4082 4083 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4084 if not self._match_text_seq("VALUE", "FOR"): 4085 self._retreat(self._index - 1) 4086 return None 4087 4088 return self.expression( 4089 exp.NextValueFor, 4090 this=self._parse_column(), 4091 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4092 ) 4093 4094 def _parse_extract(self) -> exp.Extract: 4095 this = self._parse_function() or self._parse_var() or self._parse_type() 4096 4097 if self._match(TokenType.FROM): 4098 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4099 4100 if not self._match(TokenType.COMMA): 4101 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4102 4103 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4104 4105 def _parse_any_value(self) -> exp.AnyValue: 4106 this = self._parse_lambda() 4107 is_max = None 4108 having = None 4109 4110 if self._match(TokenType.HAVING): 4111 self._match_texts(("MAX", "MIN")) 4112 is_max = self._prev.text == "MAX" 4113 having = self._parse_column() 4114 4115 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4116 4117 def _parse_cast(self, strict: bool) -> exp.Expression: 4118 this = self._parse_conjunction() 4119 4120 if not self._match(TokenType.ALIAS): 4121 if self._match(TokenType.COMMA): 4122 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4123 4124 self.raise_error("Expected AS after CAST") 4125 4126 fmt = None 4127 to = self._parse_types() 4128 4129 if not to: 4130 self.raise_error("Expected TYPE after CAST") 4131 elif isinstance(to, exp.Identifier): 4132 to = exp.DataType.build(to.name, udt=True) 4133 elif to.this == exp.DataType.Type.CHAR: 4134 if self._match(TokenType.CHARACTER_SET): 4135 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4136 elif self._match(TokenType.FORMAT): 4137 fmt_string = self._parse_string() 4138 fmt = self._parse_at_time_zone(fmt_string) 4139 4140 if to.this in exp.DataType.TEMPORAL_TYPES: 4141 this = self.expression( 4142 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4143 this=this, 4144 format=exp.Literal.string( 4145 format_time( 4146 fmt_string.this if fmt_string else "", 4147 self.FORMAT_MAPPING or self.TIME_MAPPING, 4148 self.FORMAT_TRIE or self.TIME_TRIE, 4149 ) 4150 ), 4151 ) 4152 4153 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4154 this.set("zone", fmt.args["zone"]) 4155 4156 return this 4157 4158 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4159 4160 def _parse_concat(self) -> t.Optional[exp.Expression]: 4161 args = self._parse_csv(self._parse_conjunction) 4162 if self.CONCAT_NULL_OUTPUTS_STRING: 4163 args = self._ensure_string_if_null(args) 4164 4165 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4166 # we find such a call we replace it with its argument. 4167 if len(args) == 1: 4168 return args[0] 4169 4170 return self.expression( 4171 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4172 ) 4173 4174 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4175 args = self._parse_csv(self._parse_conjunction) 4176 if len(args) < 2: 4177 return self.expression(exp.ConcatWs, expressions=args) 4178 delim, *values = args 4179 if self.CONCAT_NULL_OUTPUTS_STRING: 4180 values = self._ensure_string_if_null(values) 4181 4182 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4183 4184 def _parse_string_agg(self) -> exp.Expression: 4185 if self._match(TokenType.DISTINCT): 4186 args: t.List[t.Optional[exp.Expression]] = [ 4187 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4188 ] 4189 if self._match(TokenType.COMMA): 4190 args.extend(self._parse_csv(self._parse_conjunction)) 4191 else: 4192 args = self._parse_csv(self._parse_conjunction) # type: ignore 4193 4194 index = self._index 4195 if not self._match(TokenType.R_PAREN) and args: 4196 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4197 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4198 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4199 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4200 4201 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4202 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4203 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4204 if not self._match_text_seq("WITHIN", "GROUP"): 4205 self._retreat(index) 4206 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4207 4208 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4209 order = self._parse_order(this=seq_get(args, 0)) 4210 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4211 4212 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4213 this = self._parse_bitwise() 4214 4215 if self._match(TokenType.USING): 4216 to: t.Optional[exp.Expression] = self.expression( 4217 exp.CharacterSet, this=self._parse_var() 4218 ) 4219 elif self._match(TokenType.COMMA): 4220 to = self._parse_types() 4221 else: 4222 to = None 4223 4224 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4225 4226 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4227 """ 4228 There are generally two variants of the DECODE function: 4229 4230 - DECODE(bin, charset) 4231 - DECODE(expression, search, result [, search, result] ... [, default]) 4232 4233 The second variant will always be parsed into a CASE expression. Note that NULL 4234 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4235 instead of relying on pattern matching. 4236 """ 4237 args = self._parse_csv(self._parse_conjunction) 4238 4239 if len(args) < 3: 4240 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4241 4242 expression, *expressions = args 4243 if not expression: 4244 return None 4245 4246 ifs = [] 4247 for search, result in zip(expressions[::2], expressions[1::2]): 4248 if not search or not result: 4249 return None 4250 4251 if isinstance(search, exp.Literal): 4252 ifs.append( 4253 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4254 ) 4255 elif isinstance(search, exp.Null): 4256 ifs.append( 4257 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4258 ) 4259 else: 4260 cond = exp.or_( 4261 exp.EQ(this=expression.copy(), expression=search), 4262 exp.and_( 4263 exp.Is(this=expression.copy(), expression=exp.Null()), 4264 exp.Is(this=search.copy(), expression=exp.Null()), 4265 copy=False, 4266 ), 4267 copy=False, 4268 ) 4269 ifs.append(exp.If(this=cond, true=result)) 4270 4271 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4272 4273 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4274 self._match_text_seq("KEY") 4275 key = self._parse_column() 4276 self._match_set((TokenType.COLON, TokenType.COMMA)) 4277 self._match_text_seq("VALUE") 4278 value = self._parse_bitwise() 4279 4280 if not key and not value: 4281 return None 4282 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4283 4284 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4285 if not this or not self._match_text_seq("FORMAT", "JSON"): 4286 return this 4287 4288 return self.expression(exp.FormatJson, this=this) 4289 4290 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4291 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4292 for value in values: 4293 if self._match_text_seq(value, "ON", on): 4294 return f"{value} ON {on}" 4295 4296 return None 4297 4298 def _parse_json_object(self) -> exp.JSONObject: 4299 star = self._parse_star() 4300 expressions = ( 4301 [star] 4302 if star 4303 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4304 ) 4305 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4306 4307 unique_keys = None 4308 if self._match_text_seq("WITH", "UNIQUE"): 4309 unique_keys = True 4310 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4311 unique_keys = False 4312 4313 self._match_text_seq("KEYS") 4314 4315 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4316 self._parse_type() 4317 ) 4318 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4319 4320 return self.expression( 4321 exp.JSONObject, 4322 expressions=expressions, 4323 null_handling=null_handling, 4324 unique_keys=unique_keys, 4325 return_type=return_type, 4326 encoding=encoding, 4327 ) 4328 4329 def _parse_logarithm(self) -> exp.Func: 4330 # Default argument order is base, expression 4331 args = self._parse_csv(self._parse_range) 4332 4333 if len(args) > 1: 4334 if not self.LOG_BASE_FIRST: 4335 args.reverse() 4336 return exp.Log.from_arg_list(args) 4337 4338 return self.expression( 4339 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4340 ) 4341 4342 def _parse_match_against(self) -> exp.MatchAgainst: 4343 expressions = self._parse_csv(self._parse_column) 4344 4345 self._match_text_seq(")", "AGAINST", "(") 4346 4347 this = self._parse_string() 4348 4349 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4350 modifier = "IN NATURAL LANGUAGE MODE" 4351 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4352 modifier = f"{modifier} WITH QUERY EXPANSION" 4353 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4354 modifier = "IN BOOLEAN MODE" 4355 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4356 modifier = "WITH QUERY EXPANSION" 4357 else: 4358 modifier = None 4359 4360 return self.expression( 4361 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4362 ) 4363 4364 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4365 def _parse_open_json(self) -> exp.OpenJSON: 4366 this = self._parse_bitwise() 4367 path = self._match(TokenType.COMMA) and self._parse_string() 4368 4369 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4370 this = self._parse_field(any_token=True) 4371 kind = self._parse_types() 4372 path = self._parse_string() 4373 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4374 4375 return self.expression( 4376 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4377 ) 4378 4379 expressions = None 4380 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4381 self._match_l_paren() 4382 expressions = self._parse_csv(_parse_open_json_column_def) 4383 4384 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4385 4386 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4387 args = self._parse_csv(self._parse_bitwise) 4388 4389 if self._match(TokenType.IN): 4390 return self.expression( 4391 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4392 ) 4393 4394 if haystack_first: 4395 haystack = seq_get(args, 0) 4396 needle = seq_get(args, 1) 4397 else: 4398 needle = seq_get(args, 0) 4399 haystack = seq_get(args, 1) 4400 4401 return self.expression( 4402 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4403 ) 4404 4405 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4406 args = self._parse_csv(self._parse_table) 4407 return exp.JoinHint(this=func_name.upper(), expressions=args) 4408 4409 def _parse_substring(self) -> exp.Substring: 4410 # Postgres supports the form: substring(string [from int] [for int]) 4411 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4412 4413 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4414 4415 if self._match(TokenType.FROM): 4416 args.append(self._parse_bitwise()) 4417 if self._match(TokenType.FOR): 4418 args.append(self._parse_bitwise()) 4419 4420 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4421 4422 def _parse_trim(self) -> exp.Trim: 4423 # https://www.w3resource.com/sql/character-functions/trim.php 4424 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4425 4426 position = None 4427 collation = None 4428 expression = None 4429 4430 if self._match_texts(self.TRIM_TYPES): 4431 position = self._prev.text.upper() 4432 4433 this = self._parse_bitwise() 4434 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4435 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4436 expression = self._parse_bitwise() 4437 4438 if invert_order: 4439 this, expression = expression, this 4440 4441 if self._match(TokenType.COLLATE): 4442 collation = self._parse_bitwise() 4443 4444 return self.expression( 4445 exp.Trim, this=this, position=position, expression=expression, collation=collation 4446 ) 4447 4448 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4449 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4450 4451 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4452 return self._parse_window(self._parse_id_var(), alias=True) 4453 4454 def _parse_respect_or_ignore_nulls( 4455 self, this: t.Optional[exp.Expression] 4456 ) -> t.Optional[exp.Expression]: 4457 if self._match_text_seq("IGNORE", "NULLS"): 4458 return self.expression(exp.IgnoreNulls, this=this) 4459 if self._match_text_seq("RESPECT", "NULLS"): 4460 return self.expression(exp.RespectNulls, this=this) 4461 return this 4462 4463 def _parse_window( 4464 self, this: t.Optional[exp.Expression], alias: bool = False 4465 ) -> t.Optional[exp.Expression]: 4466 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4467 self._match(TokenType.WHERE) 4468 this = self.expression( 4469 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4470 ) 4471 self._match_r_paren() 4472 4473 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4474 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4475 if self._match_text_seq("WITHIN", "GROUP"): 4476 order = self._parse_wrapped(self._parse_order) 4477 this = self.expression(exp.WithinGroup, this=this, expression=order) 4478 4479 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4480 # Some dialects choose to implement and some do not. 4481 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4482 4483 # There is some code above in _parse_lambda that handles 4484 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4485 4486 # The below changes handle 4487 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4488 4489 # Oracle allows both formats 4490 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4491 # and Snowflake chose to do the same for familiarity 4492 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4493 this = self._parse_respect_or_ignore_nulls(this) 4494 4495 # bigquery select from window x AS (partition by ...) 4496 if alias: 4497 over = None 4498 self._match(TokenType.ALIAS) 4499 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4500 return this 4501 else: 4502 over = self._prev.text.upper() 4503 4504 if not self._match(TokenType.L_PAREN): 4505 return self.expression( 4506 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4507 ) 4508 4509 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4510 4511 first = self._match(TokenType.FIRST) 4512 if self._match_text_seq("LAST"): 4513 first = False 4514 4515 partition, order = self._parse_partition_and_order() 4516 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4517 4518 if kind: 4519 self._match(TokenType.BETWEEN) 4520 start = self._parse_window_spec() 4521 self._match(TokenType.AND) 4522 end = self._parse_window_spec() 4523 4524 spec = self.expression( 4525 exp.WindowSpec, 4526 kind=kind, 4527 start=start["value"], 4528 start_side=start["side"], 4529 end=end["value"], 4530 end_side=end["side"], 4531 ) 4532 else: 4533 spec = None 4534 4535 self._match_r_paren() 4536 4537 window = self.expression( 4538 exp.Window, 4539 this=this, 4540 partition_by=partition, 4541 order=order, 4542 spec=spec, 4543 alias=window_alias, 4544 over=over, 4545 first=first, 4546 ) 4547 4548 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4549 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4550 return self._parse_window(window, alias=alias) 4551 4552 return window 4553 4554 def _parse_partition_and_order( 4555 self, 4556 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4557 return self._parse_partition_by(), self._parse_order() 4558 4559 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4560 self._match(TokenType.BETWEEN) 4561 4562 return { 4563 "value": ( 4564 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4565 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4566 or self._parse_bitwise() 4567 ), 4568 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4569 } 4570 4571 def _parse_alias( 4572 self, this: t.Optional[exp.Expression], explicit: bool = False 4573 ) -> t.Optional[exp.Expression]: 4574 any_token = self._match(TokenType.ALIAS) 4575 4576 if explicit and not any_token: 4577 return this 4578 4579 if self._match(TokenType.L_PAREN): 4580 aliases = self.expression( 4581 exp.Aliases, 4582 this=this, 4583 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4584 ) 4585 self._match_r_paren(aliases) 4586 return aliases 4587 4588 alias = self._parse_id_var(any_token) 4589 4590 if alias: 4591 return self.expression(exp.Alias, this=this, alias=alias) 4592 4593 return this 4594 4595 def _parse_id_var( 4596 self, 4597 any_token: bool = True, 4598 tokens: t.Optional[t.Collection[TokenType]] = None, 4599 ) -> t.Optional[exp.Expression]: 4600 identifier = self._parse_identifier() 4601 4602 if identifier: 4603 return identifier 4604 4605 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4606 quoted = self._prev.token_type == TokenType.STRING 4607 return exp.Identifier(this=self._prev.text, quoted=quoted) 4608 4609 return None 4610 4611 def _parse_string(self) -> t.Optional[exp.Expression]: 4612 if self._match(TokenType.STRING): 4613 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4614 return self._parse_placeholder() 4615 4616 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4617 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4618 4619 def _parse_number(self) -> t.Optional[exp.Expression]: 4620 if self._match(TokenType.NUMBER): 4621 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4622 return self._parse_placeholder() 4623 4624 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4625 if self._match(TokenType.IDENTIFIER): 4626 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4627 return self._parse_placeholder() 4628 4629 def _parse_var( 4630 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4631 ) -> t.Optional[exp.Expression]: 4632 if ( 4633 (any_token and self._advance_any()) 4634 or self._match(TokenType.VAR) 4635 or (self._match_set(tokens) if tokens else False) 4636 ): 4637 return self.expression(exp.Var, this=self._prev.text) 4638 return self._parse_placeholder() 4639 4640 def _advance_any(self) -> t.Optional[Token]: 4641 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4642 self._advance() 4643 return self._prev 4644 return None 4645 4646 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4647 return self._parse_var() or self._parse_string() 4648 4649 def _parse_null(self) -> t.Optional[exp.Expression]: 4650 if self._match_set(self.NULL_TOKENS): 4651 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4652 return self._parse_placeholder() 4653 4654 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4655 if self._match(TokenType.TRUE): 4656 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4657 if self._match(TokenType.FALSE): 4658 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4659 return self._parse_placeholder() 4660 4661 def _parse_star(self) -> t.Optional[exp.Expression]: 4662 if self._match(TokenType.STAR): 4663 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4664 return self._parse_placeholder() 4665 4666 def _parse_parameter(self) -> exp.Parameter: 4667 wrapped = self._match(TokenType.L_BRACE) 4668 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4669 self._match(TokenType.R_BRACE) 4670 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4671 4672 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4673 if self._match_set(self.PLACEHOLDER_PARSERS): 4674 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4675 if placeholder: 4676 return placeholder 4677 self._advance(-1) 4678 return None 4679 4680 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4681 if not self._match(TokenType.EXCEPT): 4682 return None 4683 if self._match(TokenType.L_PAREN, advance=False): 4684 return self._parse_wrapped_csv(self._parse_column) 4685 4686 except_column = self._parse_column() 4687 return [except_column] if except_column else None 4688 4689 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4690 if not self._match(TokenType.REPLACE): 4691 return None 4692 if self._match(TokenType.L_PAREN, advance=False): 4693 return self._parse_wrapped_csv(self._parse_expression) 4694 4695 replace_expression = self._parse_expression() 4696 return [replace_expression] if replace_expression else None 4697 4698 def _parse_csv( 4699 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4700 ) -> t.List[exp.Expression]: 4701 parse_result = parse_method() 4702 items = [parse_result] if parse_result is not None else [] 4703 4704 while self._match(sep): 4705 self._add_comments(parse_result) 4706 parse_result = parse_method() 4707 if parse_result is not None: 4708 items.append(parse_result) 4709 4710 return items 4711 4712 def _parse_tokens( 4713 self, parse_method: t.Callable, expressions: t.Dict 4714 ) -> t.Optional[exp.Expression]: 4715 this = parse_method() 4716 4717 while self._match_set(expressions): 4718 this = self.expression( 4719 expressions[self._prev.token_type], 4720 this=this, 4721 comments=self._prev_comments, 4722 expression=parse_method(), 4723 ) 4724 4725 return this 4726 4727 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4728 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4729 4730 def _parse_wrapped_csv( 4731 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4732 ) -> t.List[exp.Expression]: 4733 return self._parse_wrapped( 4734 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4735 ) 4736 4737 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4738 wrapped = self._match(TokenType.L_PAREN) 4739 if not wrapped and not optional: 4740 self.raise_error("Expecting (") 4741 parse_result = parse_method() 4742 if wrapped: 4743 self._match_r_paren() 4744 return parse_result 4745 4746 def _parse_expressions(self) -> t.List[exp.Expression]: 4747 return self._parse_csv(self._parse_expression) 4748 4749 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4750 return self._parse_select() or self._parse_set_operations( 4751 self._parse_expression() if alias else self._parse_conjunction() 4752 ) 4753 4754 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4755 return self._parse_query_modifiers( 4756 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4757 ) 4758 4759 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4760 this = None 4761 if self._match_texts(self.TRANSACTION_KIND): 4762 this = self._prev.text 4763 4764 self._match_texts({"TRANSACTION", "WORK"}) 4765 4766 modes = [] 4767 while True: 4768 mode = [] 4769 while self._match(TokenType.VAR): 4770 mode.append(self._prev.text) 4771 4772 if mode: 4773 modes.append(" ".join(mode)) 4774 if not self._match(TokenType.COMMA): 4775 break 4776 4777 return self.expression(exp.Transaction, this=this, modes=modes) 4778 4779 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4780 chain = None 4781 savepoint = None 4782 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4783 4784 self._match_texts({"TRANSACTION", "WORK"}) 4785 4786 if self._match_text_seq("TO"): 4787 self._match_text_seq("SAVEPOINT") 4788 savepoint = self._parse_id_var() 4789 4790 if self._match(TokenType.AND): 4791 chain = not self._match_text_seq("NO") 4792 self._match_text_seq("CHAIN") 4793 4794 if is_rollback: 4795 return self.expression(exp.Rollback, savepoint=savepoint) 4796 4797 return self.expression(exp.Commit, chain=chain) 4798 4799 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4800 if not self._match_text_seq("ADD"): 4801 return None 4802 4803 self._match(TokenType.COLUMN) 4804 exists_column = self._parse_exists(not_=True) 4805 expression = self._parse_field_def() 4806 4807 if expression: 4808 expression.set("exists", exists_column) 4809 4810 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4811 if self._match_texts(("FIRST", "AFTER")): 4812 position = self._prev.text 4813 column_position = self.expression( 4814 exp.ColumnPosition, this=self._parse_column(), position=position 4815 ) 4816 expression.set("position", column_position) 4817 4818 return expression 4819 4820 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4821 drop = self._match(TokenType.DROP) and self._parse_drop() 4822 if drop and not isinstance(drop, exp.Command): 4823 drop.set("kind", drop.args.get("kind", "COLUMN")) 4824 return drop 4825 4826 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4827 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4828 return self.expression( 4829 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4830 ) 4831 4832 def _parse_add_constraint(self) -> exp.AddConstraint: 4833 this = None 4834 kind = self._prev.token_type 4835 4836 if kind == TokenType.CONSTRAINT: 4837 this = self._parse_id_var() 4838 4839 if self._match_text_seq("CHECK"): 4840 expression = self._parse_wrapped(self._parse_conjunction) 4841 enforced = self._match_text_seq("ENFORCED") 4842 4843 return self.expression( 4844 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4845 ) 4846 4847 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4848 expression = self._parse_foreign_key() 4849 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4850 expression = self._parse_primary_key() 4851 else: 4852 expression = None 4853 4854 return self.expression(exp.AddConstraint, this=this, expression=expression) 4855 4856 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4857 index = self._index - 1 4858 4859 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4860 return self._parse_csv(self._parse_add_constraint) 4861 4862 self._retreat(index) 4863 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4864 return self._parse_csv(self._parse_field_def) 4865 4866 return self._parse_csv(self._parse_add_column) 4867 4868 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4869 self._match(TokenType.COLUMN) 4870 column = self._parse_field(any_token=True) 4871 4872 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4873 return self.expression(exp.AlterColumn, this=column, drop=True) 4874 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4875 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4876 4877 self._match_text_seq("SET", "DATA") 4878 return self.expression( 4879 exp.AlterColumn, 4880 this=column, 4881 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4882 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4883 using=self._match(TokenType.USING) and self._parse_conjunction(), 4884 ) 4885 4886 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4887 index = self._index - 1 4888 4889 partition_exists = self._parse_exists() 4890 if self._match(TokenType.PARTITION, advance=False): 4891 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4892 4893 self._retreat(index) 4894 return self._parse_csv(self._parse_drop_column) 4895 4896 def _parse_alter_table_rename(self) -> exp.RenameTable: 4897 self._match_text_seq("TO") 4898 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4899 4900 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4901 start = self._prev 4902 4903 if not self._match(TokenType.TABLE): 4904 return self._parse_as_command(start) 4905 4906 exists = self._parse_exists() 4907 only = self._match_text_seq("ONLY") 4908 this = self._parse_table(schema=True) 4909 4910 if self._next: 4911 self._advance() 4912 4913 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4914 if parser: 4915 actions = ensure_list(parser(self)) 4916 4917 if not self._curr: 4918 return self.expression( 4919 exp.AlterTable, 4920 this=this, 4921 exists=exists, 4922 actions=actions, 4923 only=only, 4924 ) 4925 4926 return self._parse_as_command(start) 4927 4928 def _parse_merge(self) -> exp.Merge: 4929 self._match(TokenType.INTO) 4930 target = self._parse_table() 4931 4932 if target and self._match(TokenType.ALIAS, advance=False): 4933 target.set("alias", self._parse_table_alias()) 4934 4935 self._match(TokenType.USING) 4936 using = self._parse_table() 4937 4938 self._match(TokenType.ON) 4939 on = self._parse_conjunction() 4940 4941 whens = [] 4942 while self._match(TokenType.WHEN): 4943 matched = not self._match(TokenType.NOT) 4944 self._match_text_seq("MATCHED") 4945 source = ( 4946 False 4947 if self._match_text_seq("BY", "TARGET") 4948 else self._match_text_seq("BY", "SOURCE") 4949 ) 4950 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4951 4952 self._match(TokenType.THEN) 4953 4954 if self._match(TokenType.INSERT): 4955 _this = self._parse_star() 4956 if _this: 4957 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4958 else: 4959 then = self.expression( 4960 exp.Insert, 4961 this=self._parse_value(), 4962 expression=self._match(TokenType.VALUES) and self._parse_value(), 4963 ) 4964 elif self._match(TokenType.UPDATE): 4965 expressions = self._parse_star() 4966 if expressions: 4967 then = self.expression(exp.Update, expressions=expressions) 4968 else: 4969 then = self.expression( 4970 exp.Update, 4971 expressions=self._match(TokenType.SET) 4972 and self._parse_csv(self._parse_equality), 4973 ) 4974 elif self._match(TokenType.DELETE): 4975 then = self.expression(exp.Var, this=self._prev.text) 4976 else: 4977 then = None 4978 4979 whens.append( 4980 self.expression( 4981 exp.When, 4982 matched=matched, 4983 source=source, 4984 condition=condition, 4985 then=then, 4986 ) 4987 ) 4988 4989 return self.expression( 4990 exp.Merge, 4991 this=target, 4992 using=using, 4993 on=on, 4994 expressions=whens, 4995 ) 4996 4997 def _parse_show(self) -> t.Optional[exp.Expression]: 4998 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4999 if parser: 5000 return parser(self) 5001 return self._parse_as_command(self._prev) 5002 5003 def _parse_set_item_assignment( 5004 self, kind: t.Optional[str] = None 5005 ) -> t.Optional[exp.Expression]: 5006 index = self._index 5007 5008 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5009 return self._parse_set_transaction(global_=kind == "GLOBAL") 5010 5011 left = self._parse_primary() or self._parse_id_var() 5012 assignment_delimiter = self._match_texts(("=", "TO")) 5013 5014 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5015 self._retreat(index) 5016 return None 5017 5018 right = self._parse_statement() or self._parse_id_var() 5019 this = self.expression(exp.EQ, this=left, expression=right) 5020 5021 return self.expression(exp.SetItem, this=this, kind=kind) 5022 5023 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5024 self._match_text_seq("TRANSACTION") 5025 characteristics = self._parse_csv( 5026 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5027 ) 5028 return self.expression( 5029 exp.SetItem, 5030 expressions=characteristics, 5031 kind="TRANSACTION", 5032 **{"global": global_}, # type: ignore 5033 ) 5034 5035 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5036 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5037 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5038 5039 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5040 index = self._index 5041 set_ = self.expression( 5042 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5043 ) 5044 5045 if self._curr: 5046 self._retreat(index) 5047 return self._parse_as_command(self._prev) 5048 5049 return set_ 5050 5051 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5052 for option in options: 5053 if self._match_text_seq(*option.split(" ")): 5054 return exp.var(option) 5055 return None 5056 5057 def _parse_as_command(self, start: Token) -> exp.Command: 5058 while self._curr: 5059 self._advance() 5060 text = self._find_sql(start, self._prev) 5061 size = len(start.text) 5062 return exp.Command(this=text[:size], expression=text[size:]) 5063 5064 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5065 settings = [] 5066 5067 self._match_l_paren() 5068 kind = self._parse_id_var() 5069 5070 if self._match(TokenType.L_PAREN): 5071 while True: 5072 key = self._parse_id_var() 5073 value = self._parse_primary() 5074 5075 if not key and value is None: 5076 break 5077 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5078 self._match(TokenType.R_PAREN) 5079 5080 self._match_r_paren() 5081 5082 return self.expression( 5083 exp.DictProperty, 5084 this=this, 5085 kind=kind.this if kind else None, 5086 settings=settings, 5087 ) 5088 5089 def _parse_dict_range(self, this: str) -> exp.DictRange: 5090 self._match_l_paren() 5091 has_min = self._match_text_seq("MIN") 5092 if has_min: 5093 min = self._parse_var() or self._parse_primary() 5094 self._match_text_seq("MAX") 5095 max = self._parse_var() or self._parse_primary() 5096 else: 5097 max = self._parse_var() or self._parse_primary() 5098 min = exp.Literal.number(0) 5099 self._match_r_paren() 5100 return self.expression(exp.DictRange, this=this, min=min, max=max) 5101 5102 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5103 index = self._index 5104 expression = self._parse_column() 5105 if not self._match(TokenType.IN): 5106 self._retreat(index - 1) 5107 return None 5108 iterator = self._parse_column() 5109 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5110 return self.expression( 5111 exp.Comprehension, 5112 this=this, 5113 expression=expression, 5114 iterator=iterator, 5115 condition=condition, 5116 ) 5117 5118 def _find_parser( 5119 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5120 ) -> t.Optional[t.Callable]: 5121 if not self._curr: 5122 return None 5123 5124 index = self._index 5125 this = [] 5126 while True: 5127 # The current token might be multiple words 5128 curr = self._curr.text.upper() 5129 key = curr.split(" ") 5130 this.append(curr) 5131 5132 self._advance() 5133 result, trie = in_trie(trie, key) 5134 if result == TrieResult.FAILED: 5135 break 5136 5137 if result == TrieResult.EXISTS: 5138 subparser = parsers[" ".join(this)] 5139 return subparser 5140 5141 self._retreat(index) 5142 return None 5143 5144 def _match(self, token_type, advance=True, expression=None): 5145 if not self._curr: 5146 return None 5147 5148 if self._curr.token_type == token_type: 5149 if advance: 5150 self._advance() 5151 self._add_comments(expression) 5152 return True 5153 5154 return None 5155 5156 def _match_set(self, types, advance=True): 5157 if not self._curr: 5158 return None 5159 5160 if self._curr.token_type in types: 5161 if advance: 5162 self._advance() 5163 return True 5164 5165 return None 5166 5167 def _match_pair(self, token_type_a, token_type_b, advance=True): 5168 if not self._curr or not self._next: 5169 return None 5170 5171 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5172 if advance: 5173 self._advance(2) 5174 return True 5175 5176 return None 5177 5178 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5179 if not self._match(TokenType.L_PAREN, expression=expression): 5180 self.raise_error("Expecting (") 5181 5182 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5183 if not self._match(TokenType.R_PAREN, expression=expression): 5184 self.raise_error("Expecting )") 5185 5186 def _match_texts(self, texts, advance=True): 5187 if self._curr and self._curr.text.upper() in texts: 5188 if advance: 5189 self._advance() 5190 return True 5191 return False 5192 5193 def _match_text_seq(self, *texts, advance=True): 5194 index = self._index 5195 for text in texts: 5196 if self._curr and self._curr.text.upper() == text: 5197 self._advance() 5198 else: 5199 self._retreat(index) 5200 return False 5201 5202 if not advance: 5203 self._retreat(index) 5204 5205 return True 5206 5207 @t.overload 5208 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5209 ... 5210 5211 @t.overload 5212 def _replace_columns_with_dots( 5213 self, this: t.Optional[exp.Expression] 5214 ) -> t.Optional[exp.Expression]: 5215 ... 5216 5217 def _replace_columns_with_dots(self, this): 5218 if isinstance(this, exp.Dot): 5219 exp.replace_children(this, self._replace_columns_with_dots) 5220 elif isinstance(this, exp.Column): 5221 exp.replace_children(this, self._replace_columns_with_dots) 5222 table = this.args.get("table") 5223 this = ( 5224 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5225 ) 5226 5227 return this 5228 5229 def _replace_lambda( 5230 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5231 ) -> t.Optional[exp.Expression]: 5232 if not node: 5233 return node 5234 5235 for column in node.find_all(exp.Column): 5236 if column.parts[0].name in lambda_variables: 5237 dot_or_id = column.to_dot() if column.table else column.this 5238 parent = column.parent 5239 5240 while isinstance(parent, exp.Dot): 5241 if not isinstance(parent.parent, exp.Dot): 5242 parent.replace(dot_or_id) 5243 break 5244 parent = parent.parent 5245 else: 5246 if column is node: 5247 node = dot_or_id 5248 else: 5249 column.replace(dot_or_id) 5250 return node 5251 5252 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5253 return [ 5254 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5255 for value in values 5256 if value 5257 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 654 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 655 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 656 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 657 "LIKE": lambda self: self._parse_create_like(), 658 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 659 "LOCK": lambda self: self._parse_locking(), 660 "LOCKING": lambda self: self._parse_locking(), 661 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 662 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 663 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 664 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 665 "NO": lambda self: self._parse_no_property(), 666 "ON": lambda self: self._parse_on_property(), 667 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 668 "PARTITION BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 670 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 671 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 672 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 673 "RETURNS": lambda self: self._parse_returns(), 674 "ROW": lambda self: self._parse_row(), 675 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 676 "SAMPLE": lambda self: self.expression( 677 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 678 ), 679 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 680 "SETTINGS": lambda self: self.expression( 681 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 682 ), 683 "SORTKEY": lambda self: self._parse_sortkey(), 684 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 685 "STABLE": lambda self: self.expression( 686 exp.StabilityProperty, this=exp.Literal.string("STABLE") 687 ), 688 "STORED": lambda self: self._parse_stored(), 689 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 690 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 691 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 692 "TO": lambda self: self._parse_to_table(), 693 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 694 "TTL": lambda self: self._parse_ttl(), 695 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 696 "VOLATILE": lambda self: self._parse_volatile_property(), 697 "WITH": lambda self: self._parse_with_property(), 698 } 699 700 CONSTRAINT_PARSERS = { 701 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 702 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 703 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 704 "CHARACTER SET": lambda self: self.expression( 705 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 706 ), 707 "CHECK": lambda self: self.expression( 708 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 709 ), 710 "COLLATE": lambda self: self.expression( 711 exp.CollateColumnConstraint, this=self._parse_var() 712 ), 713 "COMMENT": lambda self: self.expression( 714 exp.CommentColumnConstraint, this=self._parse_string() 715 ), 716 "COMPRESS": lambda self: self._parse_compress(), 717 "CLUSTERED": lambda self: self.expression( 718 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 719 ), 720 "NONCLUSTERED": lambda self: self.expression( 721 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 722 ), 723 "DEFAULT": lambda self: self.expression( 724 exp.DefaultColumnConstraint, this=self._parse_bitwise() 725 ), 726 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 727 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 728 "FORMAT": lambda self: self.expression( 729 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "GENERATED": lambda self: self._parse_generated_as_identity(), 732 "IDENTITY": lambda self: self._parse_auto_increment(), 733 "INLINE": lambda self: self._parse_inline(), 734 "LIKE": lambda self: self._parse_create_like(), 735 "NOT": lambda self: self._parse_not_constraint(), 736 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 737 "ON": lambda self: ( 738 self._match(TokenType.UPDATE) 739 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 740 ) 741 or self.expression(exp.OnProperty, this=self._parse_id_var()), 742 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 743 "PRIMARY KEY": lambda self: self._parse_primary_key(), 744 "REFERENCES": lambda self: self._parse_references(match=False), 745 "TITLE": lambda self: self.expression( 746 exp.TitleColumnConstraint, this=self._parse_var_or_string() 747 ), 748 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 749 "UNIQUE": lambda self: self._parse_unique(), 750 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 751 "WITH": lambda self: self.expression( 752 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 753 ), 754 } 755 756 ALTER_PARSERS = { 757 "ADD": lambda self: self._parse_alter_table_add(), 758 "ALTER": lambda self: self._parse_alter_table_alter(), 759 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 760 "DROP": lambda self: self._parse_alter_table_drop(), 761 "RENAME": lambda self: self._parse_alter_table_rename(), 762 } 763 764 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 765 766 NO_PAREN_FUNCTION_PARSERS = { 767 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 768 "CASE": lambda self: self._parse_case(), 769 "IF": lambda self: self._parse_if(), 770 "NEXT": lambda self: self._parse_next_value_for(), 771 } 772 773 INVALID_FUNC_NAME_TOKENS = { 774 TokenType.IDENTIFIER, 775 TokenType.STRING, 776 } 777 778 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 779 780 FUNCTION_PARSERS = { 781 "ANY_VALUE": lambda self: self._parse_any_value(), 782 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 783 "CONCAT": lambda self: self._parse_concat(), 784 "CONCAT_WS": lambda self: self._parse_concat_ws(), 785 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 786 "DECODE": lambda self: self._parse_decode(), 787 "EXTRACT": lambda self: self._parse_extract(), 788 "JSON_OBJECT": lambda self: self._parse_json_object(), 789 "LOG": lambda self: self._parse_logarithm(), 790 "MATCH": lambda self: self._parse_match_against(), 791 "OPENJSON": lambda self: self._parse_open_json(), 792 "POSITION": lambda self: self._parse_position(), 793 "SAFE_CAST": lambda self: self._parse_cast(False), 794 "STRING_AGG": lambda self: self._parse_string_agg(), 795 "SUBSTRING": lambda self: self._parse_substring(), 796 "TRIM": lambda self: self._parse_trim(), 797 "TRY_CAST": lambda self: self._parse_cast(False), 798 "TRY_CONVERT": lambda self: self._parse_convert(False), 799 } 800 801 QUERY_MODIFIER_PARSERS = { 802 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 803 TokenType.WHERE: lambda self: ("where", self._parse_where()), 804 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 805 TokenType.HAVING: lambda self: ("having", self._parse_having()), 806 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 807 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 808 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 809 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 810 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 811 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 812 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 813 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 814 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 815 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 816 TokenType.CLUSTER_BY: lambda self: ( 817 "cluster", 818 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 819 ), 820 TokenType.DISTRIBUTE_BY: lambda self: ( 821 "distribute", 822 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 823 ), 824 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 825 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 826 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 827 } 828 829 SET_PARSERS = { 830 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 831 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 832 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 833 "TRANSACTION": lambda self: self._parse_set_transaction(), 834 } 835 836 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 837 838 TYPE_LITERAL_PARSERS = { 839 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 840 } 841 842 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 843 844 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 845 846 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 847 848 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 849 TRANSACTION_CHARACTERISTICS = { 850 "ISOLATION LEVEL REPEATABLE READ", 851 "ISOLATION LEVEL READ COMMITTED", 852 "ISOLATION LEVEL READ UNCOMMITTED", 853 "ISOLATION LEVEL SERIALIZABLE", 854 "READ WRITE", 855 "READ ONLY", 856 } 857 858 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 859 860 CLONE_KEYWORDS = {"CLONE", "COPY"} 861 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 862 863 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 864 865 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 866 867 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 868 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 869 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 870 871 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 872 873 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 874 875 DISTINCT_TOKENS = {TokenType.DISTINCT} 876 877 NULL_TOKENS = {TokenType.NULL} 878 879 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 880 881 STRICT_CAST = True 882 883 # A NULL arg in CONCAT yields NULL by default 884 CONCAT_NULL_OUTPUTS_STRING = False 885 886 PREFIXED_PIVOT_COLUMNS = False 887 IDENTIFY_PIVOT_STRINGS = False 888 889 LOG_BASE_FIRST = True 890 LOG_DEFAULTS_TO_LN = False 891 892 # Whether or not ADD is present for each column added by ALTER TABLE 893 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 894 895 # Whether or not the table sample clause expects CSV syntax 896 TABLESAMPLE_CSV = False 897 898 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 899 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 900 901 # Whether the TRIM function expects the characters to trim as its first argument 902 TRIM_PATTERN_FIRST = False 903 904 __slots__ = ( 905 "error_level", 906 "error_message_context", 907 "max_errors", 908 "sql", 909 "errors", 910 "_tokens", 911 "_index", 912 "_curr", 913 "_next", 914 "_prev", 915 "_prev_comments", 916 "_tokenizer", 917 ) 918 919 # Autofilled 920 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 921 INDEX_OFFSET: int = 0 922 UNNEST_COLUMN_ONLY: bool = False 923 ALIAS_POST_TABLESAMPLE: bool = False 924 STRICT_STRING_CONCAT = False 925 SUPPORTS_USER_DEFINED_TYPES = True 926 NORMALIZE_FUNCTIONS = "upper" 927 NULL_ORDERING: str = "nulls_are_small" 928 SHOW_TRIE: t.Dict = {} 929 SET_TRIE: t.Dict = {} 930 FORMAT_MAPPING: t.Dict[str, str] = {} 931 FORMAT_TRIE: t.Dict = {} 932 TIME_MAPPING: t.Dict[str, str] = {} 933 TIME_TRIE: t.Dict = {} 934 935 def __init__( 936 self, 937 error_level: t.Optional[ErrorLevel] = None, 938 error_message_context: int = 100, 939 max_errors: int = 3, 940 ): 941 self.error_level = error_level or ErrorLevel.IMMEDIATE 942 self.error_message_context = error_message_context 943 self.max_errors = max_errors 944 self._tokenizer = self.TOKENIZER_CLASS() 945 self.reset() 946 947 def reset(self): 948 self.sql = "" 949 self.errors = [] 950 self._tokens = [] 951 self._index = 0 952 self._curr = None 953 self._next = None 954 self._prev = None 955 self._prev_comments = None 956 957 def parse( 958 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 959 ) -> t.List[t.Optional[exp.Expression]]: 960 """ 961 Parses a list of tokens and returns a list of syntax trees, one tree 962 per parsed SQL statement. 963 964 Args: 965 raw_tokens: The list of tokens. 966 sql: The original SQL string, used to produce helpful debug messages. 967 968 Returns: 969 The list of the produced syntax trees. 970 """ 971 return self._parse( 972 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 973 ) 974 975 def parse_into( 976 self, 977 expression_types: exp.IntoType, 978 raw_tokens: t.List[Token], 979 sql: t.Optional[str] = None, 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens into a given Expression type. If a collection of Expression 983 types is given instead, this method will try to parse the token list into each one 984 of them, stopping at the first for which the parsing succeeds. 985 986 Args: 987 expression_types: The expression type(s) to try and parse the token list into. 988 raw_tokens: The list of tokens. 989 sql: The original SQL string, used to produce helpful debug messages. 990 991 Returns: 992 The target Expression. 993 """ 994 errors = [] 995 for expression_type in ensure_list(expression_types): 996 parser = self.EXPRESSION_PARSERS.get(expression_type) 997 if not parser: 998 raise TypeError(f"No parser registered for {expression_type}") 999 1000 try: 1001 return self._parse(parser, raw_tokens, sql) 1002 except ParseError as e: 1003 e.errors[0]["into_expression"] = expression_type 1004 errors.append(e) 1005 1006 raise ParseError( 1007 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1008 errors=merge_errors(errors), 1009 ) from errors[-1] 1010 1011 def _parse( 1012 self, 1013 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 self.reset() 1018 self.sql = sql or "" 1019 1020 total = len(raw_tokens) 1021 chunks: t.List[t.List[Token]] = [[]] 1022 1023 for i, token in enumerate(raw_tokens): 1024 if token.token_type == TokenType.SEMICOLON: 1025 if i < total - 1: 1026 chunks.append([]) 1027 else: 1028 chunks[-1].append(token) 1029 1030 expressions = [] 1031 1032 for tokens in chunks: 1033 self._index = -1 1034 self._tokens = tokens 1035 self._advance() 1036 1037 expressions.append(parse_method(self)) 1038 1039 if self._index < len(self._tokens): 1040 self.raise_error("Invalid expression / Unexpected token") 1041 1042 self.check_errors() 1043 1044 return expressions 1045 1046 def check_errors(self) -> None: 1047 """Logs or raises any found errors, depending on the chosen error level setting.""" 1048 if self.error_level == ErrorLevel.WARN: 1049 for error in self.errors: 1050 logger.error(str(error)) 1051 elif self.error_level == ErrorLevel.RAISE and self.errors: 1052 raise ParseError( 1053 concat_messages(self.errors, self.max_errors), 1054 errors=merge_errors(self.errors), 1055 ) 1056 1057 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1058 """ 1059 Appends an error in the list of recorded errors or raises it, depending on the chosen 1060 error level setting. 1061 """ 1062 token = token or self._curr or self._prev or Token.string("") 1063 start = token.start 1064 end = token.end + 1 1065 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1066 highlight = self.sql[start:end] 1067 end_context = self.sql[end : end + self.error_message_context] 1068 1069 error = ParseError.new( 1070 f"{message}. Line {token.line}, Col: {token.col}.\n" 1071 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1072 description=message, 1073 line=token.line, 1074 col=token.col, 1075 start_context=start_context, 1076 highlight=highlight, 1077 end_context=end_context, 1078 ) 1079 1080 if self.error_level == ErrorLevel.IMMEDIATE: 1081 raise error 1082 1083 self.errors.append(error) 1084 1085 def expression( 1086 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1087 ) -> E: 1088 """ 1089 Creates a new, validated Expression. 1090 1091 Args: 1092 exp_class: The expression class to instantiate. 1093 comments: An optional list of comments to attach to the expression. 1094 kwargs: The arguments to set for the expression along with their respective values. 1095 1096 Returns: 1097 The target expression. 1098 """ 1099 instance = exp_class(**kwargs) 1100 instance.add_comments(comments) if comments else self._add_comments(instance) 1101 return self.validate_expression(instance) 1102 1103 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1104 if expression and self._prev_comments: 1105 expression.add_comments(self._prev_comments) 1106 self._prev_comments = None 1107 1108 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1109 """ 1110 Validates an Expression, making sure that all its mandatory arguments are set. 1111 1112 Args: 1113 expression: The expression to validate. 1114 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1115 1116 Returns: 1117 The validated expression. 1118 """ 1119 if self.error_level != ErrorLevel.IGNORE: 1120 for error_message in expression.error_messages(args): 1121 self.raise_error(error_message) 1122 1123 return expression 1124 1125 def _find_sql(self, start: Token, end: Token) -> str: 1126 return self.sql[start.start : end.end + 1] 1127 1128 def _advance(self, times: int = 1) -> None: 1129 self._index += times 1130 self._curr = seq_get(self._tokens, self._index) 1131 self._next = seq_get(self._tokens, self._index + 1) 1132 1133 if self._index > 0: 1134 self._prev = self._tokens[self._index - 1] 1135 self._prev_comments = self._prev.comments 1136 else: 1137 self._prev = None 1138 self._prev_comments = None 1139 1140 def _retreat(self, index: int) -> None: 1141 if index != self._index: 1142 self._advance(index - self._index) 1143 1144 def _parse_command(self) -> exp.Command: 1145 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1146 1147 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1148 start = self._prev 1149 exists = self._parse_exists() if allow_exists else None 1150 1151 self._match(TokenType.ON) 1152 1153 kind = self._match_set(self.CREATABLES) and self._prev 1154 if not kind: 1155 return self._parse_as_command(start) 1156 1157 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1158 this = self._parse_user_defined_function(kind=kind.token_type) 1159 elif kind.token_type == TokenType.TABLE: 1160 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1161 elif kind.token_type == TokenType.COLUMN: 1162 this = self._parse_column() 1163 else: 1164 this = self._parse_id_var() 1165 1166 self._match(TokenType.IS) 1167 1168 return self.expression( 1169 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1170 ) 1171 1172 def _parse_to_table( 1173 self, 1174 ) -> exp.ToTableProperty: 1175 table = self._parse_table_parts(schema=True) 1176 return self.expression(exp.ToTableProperty, this=table) 1177 1178 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1179 def _parse_ttl(self) -> exp.Expression: 1180 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1181 this = self._parse_bitwise() 1182 1183 if self._match_text_seq("DELETE"): 1184 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1185 if self._match_text_seq("RECOMPRESS"): 1186 return self.expression( 1187 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1188 ) 1189 if self._match_text_seq("TO", "DISK"): 1190 return self.expression( 1191 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1192 ) 1193 if self._match_text_seq("TO", "VOLUME"): 1194 return self.expression( 1195 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1196 ) 1197 1198 return this 1199 1200 expressions = self._parse_csv(_parse_ttl_action) 1201 where = self._parse_where() 1202 group = self._parse_group() 1203 1204 aggregates = None 1205 if group and self._match(TokenType.SET): 1206 aggregates = self._parse_csv(self._parse_set_item) 1207 1208 return self.expression( 1209 exp.MergeTreeTTL, 1210 expressions=expressions, 1211 where=where, 1212 group=group, 1213 aggregates=aggregates, 1214 ) 1215 1216 def _parse_statement(self) -> t.Optional[exp.Expression]: 1217 if self._curr is None: 1218 return None 1219 1220 if self._match_set(self.STATEMENT_PARSERS): 1221 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1222 1223 if self._match_set(Tokenizer.COMMANDS): 1224 return self._parse_command() 1225 1226 expression = self._parse_expression() 1227 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1228 return self._parse_query_modifiers(expression) 1229 1230 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1231 start = self._prev 1232 temporary = self._match(TokenType.TEMPORARY) 1233 materialized = self._match_text_seq("MATERIALIZED") 1234 1235 kind = self._match_set(self.CREATABLES) and self._prev.text 1236 if not kind: 1237 return self._parse_as_command(start) 1238 1239 return self.expression( 1240 exp.Drop, 1241 comments=start.comments, 1242 exists=exists or self._parse_exists(), 1243 this=self._parse_table(schema=True), 1244 kind=kind, 1245 temporary=temporary, 1246 materialized=materialized, 1247 cascade=self._match_text_seq("CASCADE"), 1248 constraints=self._match_text_seq("CONSTRAINTS"), 1249 purge=self._match_text_seq("PURGE"), 1250 ) 1251 1252 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1253 return ( 1254 self._match_text_seq("IF") 1255 and (not not_ or self._match(TokenType.NOT)) 1256 and self._match(TokenType.EXISTS) 1257 ) 1258 1259 def _parse_create(self) -> exp.Create | exp.Command: 1260 # Note: this can't be None because we've matched a statement parser 1261 start = self._prev 1262 comments = self._prev_comments 1263 1264 replace = start.text.upper() == "REPLACE" or self._match_pair( 1265 TokenType.OR, TokenType.REPLACE 1266 ) 1267 unique = self._match(TokenType.UNIQUE) 1268 1269 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1270 self._advance() 1271 1272 properties = None 1273 create_token = self._match_set(self.CREATABLES) and self._prev 1274 1275 if not create_token: 1276 # exp.Properties.Location.POST_CREATE 1277 properties = self._parse_properties() 1278 create_token = self._match_set(self.CREATABLES) and self._prev 1279 1280 if not properties or not create_token: 1281 return self._parse_as_command(start) 1282 1283 exists = self._parse_exists(not_=True) 1284 this = None 1285 expression: t.Optional[exp.Expression] = None 1286 indexes = None 1287 no_schema_binding = None 1288 begin = None 1289 end = None 1290 clone = None 1291 1292 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1293 nonlocal properties 1294 if properties and temp_props: 1295 properties.expressions.extend(temp_props.expressions) 1296 elif temp_props: 1297 properties = temp_props 1298 1299 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1300 this = self._parse_user_defined_function(kind=create_token.token_type) 1301 1302 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1303 extend_props(self._parse_properties()) 1304 1305 self._match(TokenType.ALIAS) 1306 1307 if self._match(TokenType.COMMAND): 1308 expression = self._parse_as_command(self._prev) 1309 else: 1310 begin = self._match(TokenType.BEGIN) 1311 return_ = self._match_text_seq("RETURN") 1312 1313 if self._match(TokenType.STRING, advance=False): 1314 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1315 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1316 expression = self._parse_string() 1317 extend_props(self._parse_properties()) 1318 else: 1319 expression = self._parse_statement() 1320 1321 end = self._match_text_seq("END") 1322 1323 if return_: 1324 expression = self.expression(exp.Return, this=expression) 1325 elif create_token.token_type == TokenType.INDEX: 1326 this = self._parse_index(index=self._parse_id_var()) 1327 elif create_token.token_type in self.DB_CREATABLES: 1328 table_parts = self._parse_table_parts(schema=True) 1329 1330 # exp.Properties.Location.POST_NAME 1331 self._match(TokenType.COMMA) 1332 extend_props(self._parse_properties(before=True)) 1333 1334 this = self._parse_schema(this=table_parts) 1335 1336 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1337 extend_props(self._parse_properties()) 1338 1339 self._match(TokenType.ALIAS) 1340 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1341 # exp.Properties.Location.POST_ALIAS 1342 extend_props(self._parse_properties()) 1343 1344 expression = self._parse_ddl_select() 1345 1346 if create_token.token_type == TokenType.TABLE: 1347 # exp.Properties.Location.POST_EXPRESSION 1348 extend_props(self._parse_properties()) 1349 1350 indexes = [] 1351 while True: 1352 index = self._parse_index() 1353 1354 # exp.Properties.Location.POST_INDEX 1355 extend_props(self._parse_properties()) 1356 1357 if not index: 1358 break 1359 else: 1360 self._match(TokenType.COMMA) 1361 indexes.append(index) 1362 elif create_token.token_type == TokenType.VIEW: 1363 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1364 no_schema_binding = True 1365 1366 shallow = self._match_text_seq("SHALLOW") 1367 1368 if self._match_texts(self.CLONE_KEYWORDS): 1369 copy = self._prev.text.lower() == "copy" 1370 clone = self._parse_table(schema=True) 1371 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1372 clone_kind = ( 1373 self._match(TokenType.L_PAREN) 1374 and self._match_texts(self.CLONE_KINDS) 1375 and self._prev.text.upper() 1376 ) 1377 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1378 self._match(TokenType.R_PAREN) 1379 clone = self.expression( 1380 exp.Clone, 1381 this=clone, 1382 when=when, 1383 kind=clone_kind, 1384 shallow=shallow, 1385 expression=clone_expression, 1386 copy=copy, 1387 ) 1388 1389 return self.expression( 1390 exp.Create, 1391 comments=comments, 1392 this=this, 1393 kind=create_token.text, 1394 replace=replace, 1395 unique=unique, 1396 expression=expression, 1397 exists=exists, 1398 properties=properties, 1399 indexes=indexes, 1400 no_schema_binding=no_schema_binding, 1401 begin=begin, 1402 end=end, 1403 clone=clone, 1404 ) 1405 1406 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1407 # only used for teradata currently 1408 self._match(TokenType.COMMA) 1409 1410 kwargs = { 1411 "no": self._match_text_seq("NO"), 1412 "dual": self._match_text_seq("DUAL"), 1413 "before": self._match_text_seq("BEFORE"), 1414 "default": self._match_text_seq("DEFAULT"), 1415 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1416 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1417 "after": self._match_text_seq("AFTER"), 1418 "minimum": self._match_texts(("MIN", "MINIMUM")), 1419 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1420 } 1421 1422 if self._match_texts(self.PROPERTY_PARSERS): 1423 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1424 try: 1425 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1426 except TypeError: 1427 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1428 1429 return None 1430 1431 def _parse_property(self) -> t.Optional[exp.Expression]: 1432 if self._match_texts(self.PROPERTY_PARSERS): 1433 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1434 1435 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1436 return self._parse_character_set(default=True) 1437 1438 if self._match_text_seq("COMPOUND", "SORTKEY"): 1439 return self._parse_sortkey(compound=True) 1440 1441 if self._match_text_seq("SQL", "SECURITY"): 1442 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1443 1444 index = self._index 1445 key = self._parse_column() 1446 1447 if not self._match(TokenType.EQ): 1448 self._retreat(index) 1449 return None 1450 1451 return self.expression( 1452 exp.Property, 1453 this=key.to_dot() if isinstance(key, exp.Column) else key, 1454 value=self._parse_column() or self._parse_var(any_token=True), 1455 ) 1456 1457 def _parse_stored(self) -> exp.FileFormatProperty: 1458 self._match(TokenType.ALIAS) 1459 1460 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1461 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1462 1463 return self.expression( 1464 exp.FileFormatProperty, 1465 this=self.expression( 1466 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1467 ) 1468 if input_format or output_format 1469 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1470 ) 1471 1472 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1473 self._match(TokenType.EQ) 1474 self._match(TokenType.ALIAS) 1475 return self.expression(exp_class, this=self._parse_field()) 1476 1477 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1478 properties = [] 1479 while True: 1480 if before: 1481 prop = self._parse_property_before() 1482 else: 1483 prop = self._parse_property() 1484 1485 if not prop: 1486 break 1487 for p in ensure_list(prop): 1488 properties.append(p) 1489 1490 if properties: 1491 return self.expression(exp.Properties, expressions=properties) 1492 1493 return None 1494 1495 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1496 return self.expression( 1497 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1498 ) 1499 1500 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1501 if self._index >= 2: 1502 pre_volatile_token = self._tokens[self._index - 2] 1503 else: 1504 pre_volatile_token = None 1505 1506 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1507 return exp.VolatileProperty() 1508 1509 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1510 1511 def _parse_with_property( 1512 self, 1513 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1514 if self._match(TokenType.L_PAREN, advance=False): 1515 return self._parse_wrapped_csv(self._parse_property) 1516 1517 if self._match_text_seq("JOURNAL"): 1518 return self._parse_withjournaltable() 1519 1520 if self._match_text_seq("DATA"): 1521 return self._parse_withdata(no=False) 1522 elif self._match_text_seq("NO", "DATA"): 1523 return self._parse_withdata(no=True) 1524 1525 if not self._next: 1526 return None 1527 1528 return self._parse_withisolatedloading() 1529 1530 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1531 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1532 self._match(TokenType.EQ) 1533 1534 user = self._parse_id_var() 1535 self._match(TokenType.PARAMETER) 1536 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1537 1538 if not user or not host: 1539 return None 1540 1541 return exp.DefinerProperty(this=f"{user}@{host}") 1542 1543 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1544 self._match(TokenType.TABLE) 1545 self._match(TokenType.EQ) 1546 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1547 1548 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1549 return self.expression(exp.LogProperty, no=no) 1550 1551 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1552 return self.expression(exp.JournalProperty, **kwargs) 1553 1554 def _parse_checksum(self) -> exp.ChecksumProperty: 1555 self._match(TokenType.EQ) 1556 1557 on = None 1558 if self._match(TokenType.ON): 1559 on = True 1560 elif self._match_text_seq("OFF"): 1561 on = False 1562 1563 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1564 1565 def _parse_cluster(self) -> exp.Cluster: 1566 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1567 1568 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1569 self._match_text_seq("BY") 1570 1571 self._match_l_paren() 1572 expressions = self._parse_csv(self._parse_column) 1573 self._match_r_paren() 1574 1575 if self._match_text_seq("SORTED", "BY"): 1576 self._match_l_paren() 1577 sorted_by = self._parse_csv(self._parse_ordered) 1578 self._match_r_paren() 1579 else: 1580 sorted_by = None 1581 1582 self._match(TokenType.INTO) 1583 buckets = self._parse_number() 1584 self._match_text_seq("BUCKETS") 1585 1586 return self.expression( 1587 exp.ClusteredByProperty, 1588 expressions=expressions, 1589 sorted_by=sorted_by, 1590 buckets=buckets, 1591 ) 1592 1593 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1594 if not self._match_text_seq("GRANTS"): 1595 self._retreat(self._index - 1) 1596 return None 1597 1598 return self.expression(exp.CopyGrantsProperty) 1599 1600 def _parse_freespace(self) -> exp.FreespaceProperty: 1601 self._match(TokenType.EQ) 1602 return self.expression( 1603 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1604 ) 1605 1606 def _parse_mergeblockratio( 1607 self, no: bool = False, default: bool = False 1608 ) -> exp.MergeBlockRatioProperty: 1609 if self._match(TokenType.EQ): 1610 return self.expression( 1611 exp.MergeBlockRatioProperty, 1612 this=self._parse_number(), 1613 percent=self._match(TokenType.PERCENT), 1614 ) 1615 1616 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1617 1618 def _parse_datablocksize( 1619 self, 1620 default: t.Optional[bool] = None, 1621 minimum: t.Optional[bool] = None, 1622 maximum: t.Optional[bool] = None, 1623 ) -> exp.DataBlocksizeProperty: 1624 self._match(TokenType.EQ) 1625 size = self._parse_number() 1626 1627 units = None 1628 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1629 units = self._prev.text 1630 1631 return self.expression( 1632 exp.DataBlocksizeProperty, 1633 size=size, 1634 units=units, 1635 default=default, 1636 minimum=minimum, 1637 maximum=maximum, 1638 ) 1639 1640 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1641 self._match(TokenType.EQ) 1642 always = self._match_text_seq("ALWAYS") 1643 manual = self._match_text_seq("MANUAL") 1644 never = self._match_text_seq("NEVER") 1645 default = self._match_text_seq("DEFAULT") 1646 1647 autotemp = None 1648 if self._match_text_seq("AUTOTEMP"): 1649 autotemp = self._parse_schema() 1650 1651 return self.expression( 1652 exp.BlockCompressionProperty, 1653 always=always, 1654 manual=manual, 1655 never=never, 1656 default=default, 1657 autotemp=autotemp, 1658 ) 1659 1660 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1661 no = self._match_text_seq("NO") 1662 concurrent = self._match_text_seq("CONCURRENT") 1663 self._match_text_seq("ISOLATED", "LOADING") 1664 for_all = self._match_text_seq("FOR", "ALL") 1665 for_insert = self._match_text_seq("FOR", "INSERT") 1666 for_none = self._match_text_seq("FOR", "NONE") 1667 return self.expression( 1668 exp.IsolatedLoadingProperty, 1669 no=no, 1670 concurrent=concurrent, 1671 for_all=for_all, 1672 for_insert=for_insert, 1673 for_none=for_none, 1674 ) 1675 1676 def _parse_locking(self) -> exp.LockingProperty: 1677 if self._match(TokenType.TABLE): 1678 kind = "TABLE" 1679 elif self._match(TokenType.VIEW): 1680 kind = "VIEW" 1681 elif self._match(TokenType.ROW): 1682 kind = "ROW" 1683 elif self._match_text_seq("DATABASE"): 1684 kind = "DATABASE" 1685 else: 1686 kind = None 1687 1688 if kind in ("DATABASE", "TABLE", "VIEW"): 1689 this = self._parse_table_parts() 1690 else: 1691 this = None 1692 1693 if self._match(TokenType.FOR): 1694 for_or_in = "FOR" 1695 elif self._match(TokenType.IN): 1696 for_or_in = "IN" 1697 else: 1698 for_or_in = None 1699 1700 if self._match_text_seq("ACCESS"): 1701 lock_type = "ACCESS" 1702 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1703 lock_type = "EXCLUSIVE" 1704 elif self._match_text_seq("SHARE"): 1705 lock_type = "SHARE" 1706 elif self._match_text_seq("READ"): 1707 lock_type = "READ" 1708 elif self._match_text_seq("WRITE"): 1709 lock_type = "WRITE" 1710 elif self._match_text_seq("CHECKSUM"): 1711 lock_type = "CHECKSUM" 1712 else: 1713 lock_type = None 1714 1715 override = self._match_text_seq("OVERRIDE") 1716 1717 return self.expression( 1718 exp.LockingProperty, 1719 this=this, 1720 kind=kind, 1721 for_or_in=for_or_in, 1722 lock_type=lock_type, 1723 override=override, 1724 ) 1725 1726 def _parse_partition_by(self) -> t.List[exp.Expression]: 1727 if self._match(TokenType.PARTITION_BY): 1728 return self._parse_csv(self._parse_conjunction) 1729 return [] 1730 1731 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1732 self._match(TokenType.EQ) 1733 return self.expression( 1734 exp.PartitionedByProperty, 1735 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1736 ) 1737 1738 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1739 if self._match_text_seq("AND", "STATISTICS"): 1740 statistics = True 1741 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1742 statistics = False 1743 else: 1744 statistics = None 1745 1746 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1747 1748 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1749 if self._match_text_seq("PRIMARY", "INDEX"): 1750 return exp.NoPrimaryIndexProperty() 1751 return None 1752 1753 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1754 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1755 return exp.OnCommitProperty() 1756 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1757 return exp.OnCommitProperty(delete=True) 1758 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1759 1760 def _parse_distkey(self) -> exp.DistKeyProperty: 1761 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1762 1763 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1764 table = self._parse_table(schema=True) 1765 1766 options = [] 1767 while self._match_texts(("INCLUDING", "EXCLUDING")): 1768 this = self._prev.text.upper() 1769 1770 id_var = self._parse_id_var() 1771 if not id_var: 1772 return None 1773 1774 options.append( 1775 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1776 ) 1777 1778 return self.expression(exp.LikeProperty, this=table, expressions=options) 1779 1780 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1781 return self.expression( 1782 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1783 ) 1784 1785 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1786 self._match(TokenType.EQ) 1787 return self.expression( 1788 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1789 ) 1790 1791 def _parse_returns(self) -> exp.ReturnsProperty: 1792 value: t.Optional[exp.Expression] 1793 is_table = self._match(TokenType.TABLE) 1794 1795 if is_table: 1796 if self._match(TokenType.LT): 1797 value = self.expression( 1798 exp.Schema, 1799 this="TABLE", 1800 expressions=self._parse_csv(self._parse_struct_types), 1801 ) 1802 if not self._match(TokenType.GT): 1803 self.raise_error("Expecting >") 1804 else: 1805 value = self._parse_schema(exp.var("TABLE")) 1806 else: 1807 value = self._parse_types() 1808 1809 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1810 1811 def _parse_describe(self) -> exp.Describe: 1812 kind = self._match_set(self.CREATABLES) and self._prev.text 1813 this = self._parse_table(schema=True) 1814 properties = self._parse_properties() 1815 expressions = properties.expressions if properties else None 1816 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1817 1818 def _parse_insert(self) -> exp.Insert: 1819 comments = ensure_list(self._prev_comments) 1820 overwrite = self._match(TokenType.OVERWRITE) 1821 ignore = self._match(TokenType.IGNORE) 1822 local = self._match_text_seq("LOCAL") 1823 alternative = None 1824 1825 if self._match_text_seq("DIRECTORY"): 1826 this: t.Optional[exp.Expression] = self.expression( 1827 exp.Directory, 1828 this=self._parse_var_or_string(), 1829 local=local, 1830 row_format=self._parse_row_format(match_row=True), 1831 ) 1832 else: 1833 if self._match(TokenType.OR): 1834 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1835 1836 self._match(TokenType.INTO) 1837 comments += ensure_list(self._prev_comments) 1838 self._match(TokenType.TABLE) 1839 this = self._parse_table(schema=True) 1840 1841 returning = self._parse_returning() 1842 1843 return self.expression( 1844 exp.Insert, 1845 comments=comments, 1846 this=this, 1847 by_name=self._match_text_seq("BY", "NAME"), 1848 exists=self._parse_exists(), 1849 partition=self._parse_partition(), 1850 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1851 and self._parse_conjunction(), 1852 expression=self._parse_ddl_select(), 1853 conflict=self._parse_on_conflict(), 1854 returning=returning or self._parse_returning(), 1855 overwrite=overwrite, 1856 alternative=alternative, 1857 ignore=ignore, 1858 ) 1859 1860 def _parse_kill(self) -> exp.Kill: 1861 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1862 1863 return self.expression( 1864 exp.Kill, 1865 this=self._parse_primary(), 1866 kind=kind, 1867 ) 1868 1869 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1870 conflict = self._match_text_seq("ON", "CONFLICT") 1871 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1872 1873 if not conflict and not duplicate: 1874 return None 1875 1876 nothing = None 1877 expressions = None 1878 key = None 1879 constraint = None 1880 1881 if conflict: 1882 if self._match_text_seq("ON", "CONSTRAINT"): 1883 constraint = self._parse_id_var() 1884 else: 1885 key = self._parse_csv(self._parse_value) 1886 1887 self._match_text_seq("DO") 1888 if self._match_text_seq("NOTHING"): 1889 nothing = True 1890 else: 1891 self._match(TokenType.UPDATE) 1892 self._match(TokenType.SET) 1893 expressions = self._parse_csv(self._parse_equality) 1894 1895 return self.expression( 1896 exp.OnConflict, 1897 duplicate=duplicate, 1898 expressions=expressions, 1899 nothing=nothing, 1900 key=key, 1901 constraint=constraint, 1902 ) 1903 1904 def _parse_returning(self) -> t.Optional[exp.Returning]: 1905 if not self._match(TokenType.RETURNING): 1906 return None 1907 return self.expression( 1908 exp.Returning, 1909 expressions=self._parse_csv(self._parse_expression), 1910 into=self._match(TokenType.INTO) and self._parse_table_part(), 1911 ) 1912 1913 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1914 if not self._match(TokenType.FORMAT): 1915 return None 1916 return self._parse_row_format() 1917 1918 def _parse_row_format( 1919 self, match_row: bool = False 1920 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1921 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1922 return None 1923 1924 if self._match_text_seq("SERDE"): 1925 this = self._parse_string() 1926 1927 serde_properties = None 1928 if self._match(TokenType.SERDE_PROPERTIES): 1929 serde_properties = self.expression( 1930 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1931 ) 1932 1933 return self.expression( 1934 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1935 ) 1936 1937 self._match_text_seq("DELIMITED") 1938 1939 kwargs = {} 1940 1941 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1942 kwargs["fields"] = self._parse_string() 1943 if self._match_text_seq("ESCAPED", "BY"): 1944 kwargs["escaped"] = self._parse_string() 1945 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1946 kwargs["collection_items"] = self._parse_string() 1947 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1948 kwargs["map_keys"] = self._parse_string() 1949 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1950 kwargs["lines"] = self._parse_string() 1951 if self._match_text_seq("NULL", "DEFINED", "AS"): 1952 kwargs["null"] = self._parse_string() 1953 1954 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1955 1956 def _parse_load(self) -> exp.LoadData | exp.Command: 1957 if self._match_text_seq("DATA"): 1958 local = self._match_text_seq("LOCAL") 1959 self._match_text_seq("INPATH") 1960 inpath = self._parse_string() 1961 overwrite = self._match(TokenType.OVERWRITE) 1962 self._match_pair(TokenType.INTO, TokenType.TABLE) 1963 1964 return self.expression( 1965 exp.LoadData, 1966 this=self._parse_table(schema=True), 1967 local=local, 1968 overwrite=overwrite, 1969 inpath=inpath, 1970 partition=self._parse_partition(), 1971 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1972 serde=self._match_text_seq("SERDE") and self._parse_string(), 1973 ) 1974 return self._parse_as_command(self._prev) 1975 1976 def _parse_delete(self) -> exp.Delete: 1977 # This handles MySQL's "Multiple-Table Syntax" 1978 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1979 tables = None 1980 comments = self._prev_comments 1981 if not self._match(TokenType.FROM, advance=False): 1982 tables = self._parse_csv(self._parse_table) or None 1983 1984 returning = self._parse_returning() 1985 1986 return self.expression( 1987 exp.Delete, 1988 comments=comments, 1989 tables=tables, 1990 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1991 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1992 where=self._parse_where(), 1993 returning=returning or self._parse_returning(), 1994 limit=self._parse_limit(), 1995 ) 1996 1997 def _parse_update(self) -> exp.Update: 1998 comments = self._prev_comments 1999 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2000 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2001 returning = self._parse_returning() 2002 return self.expression( 2003 exp.Update, 2004 comments=comments, 2005 **{ # type: ignore 2006 "this": this, 2007 "expressions": expressions, 2008 "from": self._parse_from(joins=True), 2009 "where": self._parse_where(), 2010 "returning": returning or self._parse_returning(), 2011 "order": self._parse_order(), 2012 "limit": self._parse_limit(), 2013 }, 2014 ) 2015 2016 def _parse_uncache(self) -> exp.Uncache: 2017 if not self._match(TokenType.TABLE): 2018 self.raise_error("Expecting TABLE after UNCACHE") 2019 2020 return self.expression( 2021 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2022 ) 2023 2024 def _parse_cache(self) -> exp.Cache: 2025 lazy = self._match_text_seq("LAZY") 2026 self._match(TokenType.TABLE) 2027 table = self._parse_table(schema=True) 2028 2029 options = [] 2030 if self._match_text_seq("OPTIONS"): 2031 self._match_l_paren() 2032 k = self._parse_string() 2033 self._match(TokenType.EQ) 2034 v = self._parse_string() 2035 options = [k, v] 2036 self._match_r_paren() 2037 2038 self._match(TokenType.ALIAS) 2039 return self.expression( 2040 exp.Cache, 2041 this=table, 2042 lazy=lazy, 2043 options=options, 2044 expression=self._parse_select(nested=True), 2045 ) 2046 2047 def _parse_partition(self) -> t.Optional[exp.Partition]: 2048 if not self._match(TokenType.PARTITION): 2049 return None 2050 2051 return self.expression( 2052 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2053 ) 2054 2055 def _parse_value(self) -> exp.Tuple: 2056 if self._match(TokenType.L_PAREN): 2057 expressions = self._parse_csv(self._parse_conjunction) 2058 self._match_r_paren() 2059 return self.expression(exp.Tuple, expressions=expressions) 2060 2061 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2062 # https://prestodb.io/docs/current/sql/values.html 2063 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2064 2065 def _parse_projections(self) -> t.List[exp.Expression]: 2066 return self._parse_expressions() 2067 2068 def _parse_select( 2069 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2070 ) -> t.Optional[exp.Expression]: 2071 cte = self._parse_with() 2072 2073 if cte: 2074 this = self._parse_statement() 2075 2076 if not this: 2077 self.raise_error("Failed to parse any statement following CTE") 2078 return cte 2079 2080 if "with" in this.arg_types: 2081 this.set("with", cte) 2082 else: 2083 self.raise_error(f"{this.key} does not support CTE") 2084 this = cte 2085 2086 return this 2087 2088 # duckdb supports leading with FROM x 2089 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2090 2091 if self._match(TokenType.SELECT): 2092 comments = self._prev_comments 2093 2094 hint = self._parse_hint() 2095 all_ = self._match(TokenType.ALL) 2096 distinct = self._match_set(self.DISTINCT_TOKENS) 2097 2098 kind = ( 2099 self._match(TokenType.ALIAS) 2100 and self._match_texts(("STRUCT", "VALUE")) 2101 and self._prev.text 2102 ) 2103 2104 if distinct: 2105 distinct = self.expression( 2106 exp.Distinct, 2107 on=self._parse_value() if self._match(TokenType.ON) else None, 2108 ) 2109 2110 if all_ and distinct: 2111 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2112 2113 limit = self._parse_limit(top=True) 2114 projections = self._parse_projections() 2115 2116 this = self.expression( 2117 exp.Select, 2118 kind=kind, 2119 hint=hint, 2120 distinct=distinct, 2121 expressions=projections, 2122 limit=limit, 2123 ) 2124 this.comments = comments 2125 2126 into = self._parse_into() 2127 if into: 2128 this.set("into", into) 2129 2130 if not from_: 2131 from_ = self._parse_from() 2132 2133 if from_: 2134 this.set("from", from_) 2135 2136 this = self._parse_query_modifiers(this) 2137 elif (table or nested) and self._match(TokenType.L_PAREN): 2138 if self._match(TokenType.PIVOT): 2139 this = self._parse_simplified_pivot() 2140 elif self._match(TokenType.FROM): 2141 this = exp.select("*").from_( 2142 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2143 ) 2144 else: 2145 this = self._parse_table() if table else self._parse_select(nested=True) 2146 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2147 2148 self._match_r_paren() 2149 2150 # We return early here so that the UNION isn't attached to the subquery by the 2151 # following call to _parse_set_operations, but instead becomes the parent node 2152 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2153 elif self._match(TokenType.VALUES): 2154 this = self.expression( 2155 exp.Values, 2156 expressions=self._parse_csv(self._parse_value), 2157 alias=self._parse_table_alias(), 2158 ) 2159 elif from_: 2160 this = exp.select("*").from_(from_.this, copy=False) 2161 else: 2162 this = None 2163 2164 return self._parse_set_operations(this) 2165 2166 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2167 if not skip_with_token and not self._match(TokenType.WITH): 2168 return None 2169 2170 comments = self._prev_comments 2171 recursive = self._match(TokenType.RECURSIVE) 2172 2173 expressions = [] 2174 while True: 2175 expressions.append(self._parse_cte()) 2176 2177 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2178 break 2179 else: 2180 self._match(TokenType.WITH) 2181 2182 return self.expression( 2183 exp.With, comments=comments, expressions=expressions, recursive=recursive 2184 ) 2185 2186 def _parse_cte(self) -> exp.CTE: 2187 alias = self._parse_table_alias() 2188 if not alias or not alias.this: 2189 self.raise_error("Expected CTE to have alias") 2190 2191 self._match(TokenType.ALIAS) 2192 return self.expression( 2193 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2194 ) 2195 2196 def _parse_table_alias( 2197 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2198 ) -> t.Optional[exp.TableAlias]: 2199 any_token = self._match(TokenType.ALIAS) 2200 alias = ( 2201 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2202 or self._parse_string_as_identifier() 2203 ) 2204 2205 index = self._index 2206 if self._match(TokenType.L_PAREN): 2207 columns = self._parse_csv(self._parse_function_parameter) 2208 self._match_r_paren() if columns else self._retreat(index) 2209 else: 2210 columns = None 2211 2212 if not alias and not columns: 2213 return None 2214 2215 return self.expression(exp.TableAlias, this=alias, columns=columns) 2216 2217 def _parse_subquery( 2218 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2219 ) -> t.Optional[exp.Subquery]: 2220 if not this: 2221 return None 2222 2223 return self.expression( 2224 exp.Subquery, 2225 this=this, 2226 pivots=self._parse_pivots(), 2227 alias=self._parse_table_alias() if parse_alias else None, 2228 ) 2229 2230 def _parse_query_modifiers( 2231 self, this: t.Optional[exp.Expression] 2232 ) -> t.Optional[exp.Expression]: 2233 if isinstance(this, self.MODIFIABLES): 2234 for join in iter(self._parse_join, None): 2235 this.append("joins", join) 2236 for lateral in iter(self._parse_lateral, None): 2237 this.append("laterals", lateral) 2238 2239 while True: 2240 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2241 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2242 key, expression = parser(self) 2243 2244 if expression: 2245 this.set(key, expression) 2246 if key == "limit": 2247 offset = expression.args.pop("offset", None) 2248 if offset: 2249 this.set("offset", exp.Offset(expression=offset)) 2250 continue 2251 break 2252 return this 2253 2254 def _parse_hint(self) -> t.Optional[exp.Hint]: 2255 if self._match(TokenType.HINT): 2256 hints = [] 2257 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2258 hints.extend(hint) 2259 2260 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2261 self.raise_error("Expected */ after HINT") 2262 2263 return self.expression(exp.Hint, expressions=hints) 2264 2265 return None 2266 2267 def _parse_into(self) -> t.Optional[exp.Into]: 2268 if not self._match(TokenType.INTO): 2269 return None 2270 2271 temp = self._match(TokenType.TEMPORARY) 2272 unlogged = self._match_text_seq("UNLOGGED") 2273 self._match(TokenType.TABLE) 2274 2275 return self.expression( 2276 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2277 ) 2278 2279 def _parse_from( 2280 self, joins: bool = False, skip_from_token: bool = False 2281 ) -> t.Optional[exp.From]: 2282 if not skip_from_token and not self._match(TokenType.FROM): 2283 return None 2284 2285 return self.expression( 2286 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2287 ) 2288 2289 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2290 if not self._match(TokenType.MATCH_RECOGNIZE): 2291 return None 2292 2293 self._match_l_paren() 2294 2295 partition = self._parse_partition_by() 2296 order = self._parse_order() 2297 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2298 2299 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2300 rows = exp.var("ONE ROW PER MATCH") 2301 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2302 text = "ALL ROWS PER MATCH" 2303 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2304 text += f" SHOW EMPTY MATCHES" 2305 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2306 text += f" OMIT EMPTY MATCHES" 2307 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2308 text += f" WITH UNMATCHED ROWS" 2309 rows = exp.var(text) 2310 else: 2311 rows = None 2312 2313 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2314 text = "AFTER MATCH SKIP" 2315 if self._match_text_seq("PAST", "LAST", "ROW"): 2316 text += f" PAST LAST ROW" 2317 elif self._match_text_seq("TO", "NEXT", "ROW"): 2318 text += f" TO NEXT ROW" 2319 elif self._match_text_seq("TO", "FIRST"): 2320 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2321 elif self._match_text_seq("TO", "LAST"): 2322 text += f" TO LAST {self._advance_any().text}" # type: ignore 2323 after = exp.var(text) 2324 else: 2325 after = None 2326 2327 if self._match_text_seq("PATTERN"): 2328 self._match_l_paren() 2329 2330 if not self._curr: 2331 self.raise_error("Expecting )", self._curr) 2332 2333 paren = 1 2334 start = self._curr 2335 2336 while self._curr and paren > 0: 2337 if self._curr.token_type == TokenType.L_PAREN: 2338 paren += 1 2339 if self._curr.token_type == TokenType.R_PAREN: 2340 paren -= 1 2341 2342 end = self._prev 2343 self._advance() 2344 2345 if paren > 0: 2346 self.raise_error("Expecting )", self._curr) 2347 2348 pattern = exp.var(self._find_sql(start, end)) 2349 else: 2350 pattern = None 2351 2352 define = ( 2353 self._parse_csv( 2354 lambda: self.expression( 2355 exp.Alias, 2356 alias=self._parse_id_var(any_token=True), 2357 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2358 ) 2359 ) 2360 if self._match_text_seq("DEFINE") 2361 else None 2362 ) 2363 2364 self._match_r_paren() 2365 2366 return self.expression( 2367 exp.MatchRecognize, 2368 partition_by=partition, 2369 order=order, 2370 measures=measures, 2371 rows=rows, 2372 after=after, 2373 pattern=pattern, 2374 define=define, 2375 alias=self._parse_table_alias(), 2376 ) 2377 2378 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2379 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2380 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2381 2382 if outer_apply or cross_apply: 2383 this = self._parse_select(table=True) 2384 view = None 2385 outer = not cross_apply 2386 elif self._match(TokenType.LATERAL): 2387 this = self._parse_select(table=True) 2388 view = self._match(TokenType.VIEW) 2389 outer = self._match(TokenType.OUTER) 2390 else: 2391 return None 2392 2393 if not this: 2394 this = ( 2395 self._parse_unnest() 2396 or self._parse_function() 2397 or self._parse_id_var(any_token=False) 2398 ) 2399 2400 while self._match(TokenType.DOT): 2401 this = exp.Dot( 2402 this=this, 2403 expression=self._parse_function() or self._parse_id_var(any_token=False), 2404 ) 2405 2406 if view: 2407 table = self._parse_id_var(any_token=False) 2408 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2409 table_alias: t.Optional[exp.TableAlias] = self.expression( 2410 exp.TableAlias, this=table, columns=columns 2411 ) 2412 elif isinstance(this, exp.Subquery) and this.alias: 2413 # Ensures parity between the Subquery's and the Lateral's "alias" args 2414 table_alias = this.args["alias"].copy() 2415 else: 2416 table_alias = self._parse_table_alias() 2417 2418 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2419 2420 def _parse_join_parts( 2421 self, 2422 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2423 return ( 2424 self._match_set(self.JOIN_METHODS) and self._prev, 2425 self._match_set(self.JOIN_SIDES) and self._prev, 2426 self._match_set(self.JOIN_KINDS) and self._prev, 2427 ) 2428 2429 def _parse_join( 2430 self, skip_join_token: bool = False, parse_bracket: bool = False 2431 ) -> t.Optional[exp.Join]: 2432 if self._match(TokenType.COMMA): 2433 return self.expression(exp.Join, this=self._parse_table()) 2434 2435 index = self._index 2436 method, side, kind = self._parse_join_parts() 2437 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2438 join = self._match(TokenType.JOIN) 2439 2440 if not skip_join_token and not join: 2441 self._retreat(index) 2442 kind = None 2443 method = None 2444 side = None 2445 2446 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2447 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2448 2449 if not skip_join_token and not join and not outer_apply and not cross_apply: 2450 return None 2451 2452 if outer_apply: 2453 side = Token(TokenType.LEFT, "LEFT") 2454 2455 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2456 2457 if method: 2458 kwargs["method"] = method.text 2459 if side: 2460 kwargs["side"] = side.text 2461 if kind: 2462 kwargs["kind"] = kind.text 2463 if hint: 2464 kwargs["hint"] = hint 2465 2466 if self._match(TokenType.ON): 2467 kwargs["on"] = self._parse_conjunction() 2468 elif self._match(TokenType.USING): 2469 kwargs["using"] = self._parse_wrapped_id_vars() 2470 elif not (kind and kind.token_type == TokenType.CROSS): 2471 index = self._index 2472 join = self._parse_join() 2473 2474 if join and self._match(TokenType.ON): 2475 kwargs["on"] = self._parse_conjunction() 2476 elif join and self._match(TokenType.USING): 2477 kwargs["using"] = self._parse_wrapped_id_vars() 2478 else: 2479 join = None 2480 self._retreat(index) 2481 2482 kwargs["this"].set("joins", [join] if join else None) 2483 2484 comments = [c for token in (method, side, kind) if token for c in token.comments] 2485 return self.expression(exp.Join, comments=comments, **kwargs) 2486 2487 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2488 this = self._parse_conjunction() 2489 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2490 return this 2491 2492 opclass = self._parse_var(any_token=True) 2493 if opclass: 2494 return self.expression(exp.Opclass, this=this, expression=opclass) 2495 2496 return this 2497 2498 def _parse_index( 2499 self, 2500 index: t.Optional[exp.Expression] = None, 2501 ) -> t.Optional[exp.Index]: 2502 if index: 2503 unique = None 2504 primary = None 2505 amp = None 2506 2507 self._match(TokenType.ON) 2508 self._match(TokenType.TABLE) # hive 2509 table = self._parse_table_parts(schema=True) 2510 else: 2511 unique = self._match(TokenType.UNIQUE) 2512 primary = self._match_text_seq("PRIMARY") 2513 amp = self._match_text_seq("AMP") 2514 2515 if not self._match(TokenType.INDEX): 2516 return None 2517 2518 index = self._parse_id_var() 2519 table = None 2520 2521 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2522 2523 if self._match(TokenType.L_PAREN, advance=False): 2524 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2525 else: 2526 columns = None 2527 2528 return self.expression( 2529 exp.Index, 2530 this=index, 2531 table=table, 2532 using=using, 2533 columns=columns, 2534 unique=unique, 2535 primary=primary, 2536 amp=amp, 2537 partition_by=self._parse_partition_by(), 2538 where=self._parse_where(), 2539 ) 2540 2541 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2542 hints: t.List[exp.Expression] = [] 2543 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2544 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2545 hints.append( 2546 self.expression( 2547 exp.WithTableHint, 2548 expressions=self._parse_csv( 2549 lambda: self._parse_function() or self._parse_var(any_token=True) 2550 ), 2551 ) 2552 ) 2553 self._match_r_paren() 2554 else: 2555 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2556 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2557 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2558 2559 self._match_texts({"INDEX", "KEY"}) 2560 if self._match(TokenType.FOR): 2561 hint.set("target", self._advance_any() and self._prev.text.upper()) 2562 2563 hint.set("expressions", self._parse_wrapped_id_vars()) 2564 hints.append(hint) 2565 2566 return hints or None 2567 2568 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2569 return ( 2570 (not schema and self._parse_function(optional_parens=False)) 2571 or self._parse_id_var(any_token=False) 2572 or self._parse_string_as_identifier() 2573 or self._parse_placeholder() 2574 ) 2575 2576 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2577 catalog = None 2578 db = None 2579 table = self._parse_table_part(schema=schema) 2580 2581 while self._match(TokenType.DOT): 2582 if catalog: 2583 # This allows nesting the table in arbitrarily many dot expressions if needed 2584 table = self.expression( 2585 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2586 ) 2587 else: 2588 catalog = db 2589 db = table 2590 table = self._parse_table_part(schema=schema) 2591 2592 if not table: 2593 self.raise_error(f"Expected table name but got {self._curr}") 2594 2595 return self.expression( 2596 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2597 ) 2598 2599 def _parse_table( 2600 self, 2601 schema: bool = False, 2602 joins: bool = False, 2603 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2604 parse_bracket: bool = False, 2605 ) -> t.Optional[exp.Expression]: 2606 lateral = self._parse_lateral() 2607 if lateral: 2608 return lateral 2609 2610 unnest = self._parse_unnest() 2611 if unnest: 2612 return unnest 2613 2614 values = self._parse_derived_table_values() 2615 if values: 2616 return values 2617 2618 subquery = self._parse_select(table=True) 2619 if subquery: 2620 if not subquery.args.get("pivots"): 2621 subquery.set("pivots", self._parse_pivots()) 2622 return subquery 2623 2624 bracket = parse_bracket and self._parse_bracket(None) 2625 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2626 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2627 2628 if schema: 2629 return self._parse_schema(this=this) 2630 2631 version = self._parse_version() 2632 2633 if version: 2634 this.set("version", version) 2635 2636 if self.ALIAS_POST_TABLESAMPLE: 2637 table_sample = self._parse_table_sample() 2638 2639 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2640 if alias: 2641 this.set("alias", alias) 2642 2643 this.set("hints", self._parse_table_hints()) 2644 2645 if not this.args.get("pivots"): 2646 this.set("pivots", self._parse_pivots()) 2647 2648 if not self.ALIAS_POST_TABLESAMPLE: 2649 table_sample = self._parse_table_sample() 2650 2651 if table_sample: 2652 table_sample.set("this", this) 2653 this = table_sample 2654 2655 if joins: 2656 for join in iter(self._parse_join, None): 2657 this.append("joins", join) 2658 2659 return this 2660 2661 def _parse_version(self) -> t.Optional[exp.Version]: 2662 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2663 this = "TIMESTAMP" 2664 elif self._match(TokenType.VERSION_SNAPSHOT): 2665 this = "VERSION" 2666 else: 2667 return None 2668 2669 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2670 kind = self._prev.text.upper() 2671 start = self._parse_bitwise() 2672 self._match_texts(("TO", "AND")) 2673 end = self._parse_bitwise() 2674 expression: t.Optional[exp.Expression] = self.expression( 2675 exp.Tuple, expressions=[start, end] 2676 ) 2677 elif self._match_text_seq("CONTAINED", "IN"): 2678 kind = "CONTAINED IN" 2679 expression = self.expression( 2680 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2681 ) 2682 elif self._match(TokenType.ALL): 2683 kind = "ALL" 2684 expression = None 2685 else: 2686 self._match_text_seq("AS", "OF") 2687 kind = "AS OF" 2688 expression = self._parse_type() 2689 2690 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2691 2692 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2693 if not self._match(TokenType.UNNEST): 2694 return None 2695 2696 expressions = self._parse_wrapped_csv(self._parse_type) 2697 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2698 2699 alias = self._parse_table_alias() if with_alias else None 2700 2701 if alias: 2702 if self.UNNEST_COLUMN_ONLY: 2703 if alias.args.get("columns"): 2704 self.raise_error("Unexpected extra column alias in unnest.") 2705 2706 alias.set("columns", [alias.this]) 2707 alias.set("this", None) 2708 2709 columns = alias.args.get("columns") or [] 2710 if offset and len(expressions) < len(columns): 2711 offset = columns.pop() 2712 2713 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2714 self._match(TokenType.ALIAS) 2715 offset = self._parse_id_var( 2716 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2717 ) or exp.to_identifier("offset") 2718 2719 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2720 2721 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2722 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2723 if not is_derived and not self._match(TokenType.VALUES): 2724 return None 2725 2726 expressions = self._parse_csv(self._parse_value) 2727 alias = self._parse_table_alias() 2728 2729 if is_derived: 2730 self._match_r_paren() 2731 2732 return self.expression( 2733 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2734 ) 2735 2736 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2737 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2738 as_modifier and self._match_text_seq("USING", "SAMPLE") 2739 ): 2740 return None 2741 2742 bucket_numerator = None 2743 bucket_denominator = None 2744 bucket_field = None 2745 percent = None 2746 rows = None 2747 size = None 2748 seed = None 2749 2750 kind = ( 2751 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2752 ) 2753 method = self._parse_var(tokens=(TokenType.ROW,)) 2754 2755 matched_l_paren = self._match(TokenType.L_PAREN) 2756 2757 if self.TABLESAMPLE_CSV: 2758 num = None 2759 expressions = self._parse_csv(self._parse_primary) 2760 else: 2761 expressions = None 2762 num = ( 2763 self._parse_factor() 2764 if self._match(TokenType.NUMBER, advance=False) 2765 else self._parse_primary() 2766 ) 2767 2768 if self._match_text_seq("BUCKET"): 2769 bucket_numerator = self._parse_number() 2770 self._match_text_seq("OUT", "OF") 2771 bucket_denominator = bucket_denominator = self._parse_number() 2772 self._match(TokenType.ON) 2773 bucket_field = self._parse_field() 2774 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2775 percent = num 2776 elif self._match(TokenType.ROWS): 2777 rows = num 2778 elif num: 2779 size = num 2780 2781 if matched_l_paren: 2782 self._match_r_paren() 2783 2784 if self._match(TokenType.L_PAREN): 2785 method = self._parse_var() 2786 seed = self._match(TokenType.COMMA) and self._parse_number() 2787 self._match_r_paren() 2788 elif self._match_texts(("SEED", "REPEATABLE")): 2789 seed = self._parse_wrapped(self._parse_number) 2790 2791 return self.expression( 2792 exp.TableSample, 2793 expressions=expressions, 2794 method=method, 2795 bucket_numerator=bucket_numerator, 2796 bucket_denominator=bucket_denominator, 2797 bucket_field=bucket_field, 2798 percent=percent, 2799 rows=rows, 2800 size=size, 2801 seed=seed, 2802 kind=kind, 2803 ) 2804 2805 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2806 return list(iter(self._parse_pivot, None)) or None 2807 2808 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2809 return list(iter(self._parse_join, None)) or None 2810 2811 # https://duckdb.org/docs/sql/statements/pivot 2812 def _parse_simplified_pivot(self) -> exp.Pivot: 2813 def _parse_on() -> t.Optional[exp.Expression]: 2814 this = self._parse_bitwise() 2815 return self._parse_in(this) if self._match(TokenType.IN) else this 2816 2817 this = self._parse_table() 2818 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2819 using = self._match(TokenType.USING) and self._parse_csv( 2820 lambda: self._parse_alias(self._parse_function()) 2821 ) 2822 group = self._parse_group() 2823 return self.expression( 2824 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2825 ) 2826 2827 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2828 index = self._index 2829 include_nulls = None 2830 2831 if self._match(TokenType.PIVOT): 2832 unpivot = False 2833 elif self._match(TokenType.UNPIVOT): 2834 unpivot = True 2835 2836 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2837 if self._match_text_seq("INCLUDE", "NULLS"): 2838 include_nulls = True 2839 elif self._match_text_seq("EXCLUDE", "NULLS"): 2840 include_nulls = False 2841 else: 2842 return None 2843 2844 expressions = [] 2845 field = None 2846 2847 if not self._match(TokenType.L_PAREN): 2848 self._retreat(index) 2849 return None 2850 2851 if unpivot: 2852 expressions = self._parse_csv(self._parse_column) 2853 else: 2854 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2855 2856 if not expressions: 2857 self.raise_error("Failed to parse PIVOT's aggregation list") 2858 2859 if not self._match(TokenType.FOR): 2860 self.raise_error("Expecting FOR") 2861 2862 value = self._parse_column() 2863 2864 if not self._match(TokenType.IN): 2865 self.raise_error("Expecting IN") 2866 2867 field = self._parse_in(value, alias=True) 2868 2869 self._match_r_paren() 2870 2871 pivot = self.expression( 2872 exp.Pivot, 2873 expressions=expressions, 2874 field=field, 2875 unpivot=unpivot, 2876 include_nulls=include_nulls, 2877 ) 2878 2879 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2880 pivot.set("alias", self._parse_table_alias()) 2881 2882 if not unpivot: 2883 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2884 2885 columns: t.List[exp.Expression] = [] 2886 for fld in pivot.args["field"].expressions: 2887 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2888 for name in names: 2889 if self.PREFIXED_PIVOT_COLUMNS: 2890 name = f"{name}_{field_name}" if name else field_name 2891 else: 2892 name = f"{field_name}_{name}" if name else field_name 2893 2894 columns.append(exp.to_identifier(name)) 2895 2896 pivot.set("columns", columns) 2897 2898 return pivot 2899 2900 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2901 return [agg.alias for agg in aggregations] 2902 2903 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2904 if not skip_where_token and not self._match(TokenType.WHERE): 2905 return None 2906 2907 return self.expression( 2908 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2909 ) 2910 2911 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2912 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2913 return None 2914 2915 elements = defaultdict(list) 2916 2917 if self._match(TokenType.ALL): 2918 return self.expression(exp.Group, all=True) 2919 2920 while True: 2921 expressions = self._parse_csv(self._parse_conjunction) 2922 if expressions: 2923 elements["expressions"].extend(expressions) 2924 2925 grouping_sets = self._parse_grouping_sets() 2926 if grouping_sets: 2927 elements["grouping_sets"].extend(grouping_sets) 2928 2929 rollup = None 2930 cube = None 2931 totals = None 2932 2933 with_ = self._match(TokenType.WITH) 2934 if self._match(TokenType.ROLLUP): 2935 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2936 elements["rollup"].extend(ensure_list(rollup)) 2937 2938 if self._match(TokenType.CUBE): 2939 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2940 elements["cube"].extend(ensure_list(cube)) 2941 2942 if self._match_text_seq("TOTALS"): 2943 totals = True 2944 elements["totals"] = True # type: ignore 2945 2946 if not (grouping_sets or rollup or cube or totals): 2947 break 2948 2949 return self.expression(exp.Group, **elements) # type: ignore 2950 2951 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2952 if not self._match(TokenType.GROUPING_SETS): 2953 return None 2954 2955 return self._parse_wrapped_csv(self._parse_grouping_set) 2956 2957 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2958 if self._match(TokenType.L_PAREN): 2959 grouping_set = self._parse_csv(self._parse_column) 2960 self._match_r_paren() 2961 return self.expression(exp.Tuple, expressions=grouping_set) 2962 2963 return self._parse_column() 2964 2965 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2966 if not skip_having_token and not self._match(TokenType.HAVING): 2967 return None 2968 return self.expression(exp.Having, this=self._parse_conjunction()) 2969 2970 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2971 if not self._match(TokenType.QUALIFY): 2972 return None 2973 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2974 2975 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2976 if skip_start_token: 2977 start = None 2978 elif self._match(TokenType.START_WITH): 2979 start = self._parse_conjunction() 2980 else: 2981 return None 2982 2983 self._match(TokenType.CONNECT_BY) 2984 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2985 exp.Prior, this=self._parse_bitwise() 2986 ) 2987 connect = self._parse_conjunction() 2988 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2989 2990 if not start and self._match(TokenType.START_WITH): 2991 start = self._parse_conjunction() 2992 2993 return self.expression(exp.Connect, start=start, connect=connect) 2994 2995 def _parse_order( 2996 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2997 ) -> t.Optional[exp.Expression]: 2998 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2999 return this 3000 3001 return self.expression( 3002 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3003 ) 3004 3005 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3006 if not self._match(token): 3007 return None 3008 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3009 3010 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3011 this = parse_method() if parse_method else self._parse_conjunction() 3012 3013 asc = self._match(TokenType.ASC) 3014 desc = self._match(TokenType.DESC) or (asc and False) 3015 3016 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3017 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3018 3019 nulls_first = is_nulls_first or False 3020 explicitly_null_ordered = is_nulls_first or is_nulls_last 3021 3022 if ( 3023 not explicitly_null_ordered 3024 and ( 3025 (not desc and self.NULL_ORDERING == "nulls_are_small") 3026 or (desc and self.NULL_ORDERING != "nulls_are_small") 3027 ) 3028 and self.NULL_ORDERING != "nulls_are_last" 3029 ): 3030 nulls_first = True 3031 3032 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3033 3034 def _parse_limit( 3035 self, this: t.Optional[exp.Expression] = None, top: bool = False 3036 ) -> t.Optional[exp.Expression]: 3037 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3038 comments = self._prev_comments 3039 if top: 3040 limit_paren = self._match(TokenType.L_PAREN) 3041 expression = self._parse_number() 3042 3043 if limit_paren: 3044 self._match_r_paren() 3045 else: 3046 expression = self._parse_term() 3047 3048 if self._match(TokenType.COMMA): 3049 offset = expression 3050 expression = self._parse_term() 3051 else: 3052 offset = None 3053 3054 limit_exp = self.expression( 3055 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3056 ) 3057 3058 return limit_exp 3059 3060 if self._match(TokenType.FETCH): 3061 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3062 direction = self._prev.text if direction else "FIRST" 3063 3064 count = self._parse_field(tokens=self.FETCH_TOKENS) 3065 percent = self._match(TokenType.PERCENT) 3066 3067 self._match_set((TokenType.ROW, TokenType.ROWS)) 3068 3069 only = self._match_text_seq("ONLY") 3070 with_ties = self._match_text_seq("WITH", "TIES") 3071 3072 if only and with_ties: 3073 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3074 3075 return self.expression( 3076 exp.Fetch, 3077 direction=direction, 3078 count=count, 3079 percent=percent, 3080 with_ties=with_ties, 3081 ) 3082 3083 return this 3084 3085 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3086 if not self._match(TokenType.OFFSET): 3087 return this 3088 3089 count = self._parse_term() 3090 self._match_set((TokenType.ROW, TokenType.ROWS)) 3091 return self.expression(exp.Offset, this=this, expression=count) 3092 3093 def _parse_locks(self) -> t.List[exp.Lock]: 3094 locks = [] 3095 while True: 3096 if self._match_text_seq("FOR", "UPDATE"): 3097 update = True 3098 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3099 "LOCK", "IN", "SHARE", "MODE" 3100 ): 3101 update = False 3102 else: 3103 break 3104 3105 expressions = None 3106 if self._match_text_seq("OF"): 3107 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3108 3109 wait: t.Optional[bool | exp.Expression] = None 3110 if self._match_text_seq("NOWAIT"): 3111 wait = True 3112 elif self._match_text_seq("WAIT"): 3113 wait = self._parse_primary() 3114 elif self._match_text_seq("SKIP", "LOCKED"): 3115 wait = False 3116 3117 locks.append( 3118 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3119 ) 3120 3121 return locks 3122 3123 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3124 if not self._match_set(self.SET_OPERATIONS): 3125 return this 3126 3127 token_type = self._prev.token_type 3128 3129 if token_type == TokenType.UNION: 3130 expression = exp.Union 3131 elif token_type == TokenType.EXCEPT: 3132 expression = exp.Except 3133 else: 3134 expression = exp.Intersect 3135 3136 return self.expression( 3137 expression, 3138 this=this, 3139 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3140 by_name=self._match_text_seq("BY", "NAME"), 3141 expression=self._parse_set_operations(self._parse_select(nested=True)), 3142 ) 3143 3144 def _parse_expression(self) -> t.Optional[exp.Expression]: 3145 return self._parse_alias(self._parse_conjunction()) 3146 3147 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3148 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3149 3150 def _parse_equality(self) -> t.Optional[exp.Expression]: 3151 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3152 3153 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3154 return self._parse_tokens(self._parse_range, self.COMPARISON) 3155 3156 def _parse_range(self) -> t.Optional[exp.Expression]: 3157 this = self._parse_bitwise() 3158 negate = self._match(TokenType.NOT) 3159 3160 if self._match_set(self.RANGE_PARSERS): 3161 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3162 if not expression: 3163 return this 3164 3165 this = expression 3166 elif self._match(TokenType.ISNULL): 3167 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3168 3169 # Postgres supports ISNULL and NOTNULL for conditions. 3170 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3171 if self._match(TokenType.NOTNULL): 3172 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3173 this = self.expression(exp.Not, this=this) 3174 3175 if negate: 3176 this = self.expression(exp.Not, this=this) 3177 3178 if self._match(TokenType.IS): 3179 this = self._parse_is(this) 3180 3181 return this 3182 3183 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3184 index = self._index - 1 3185 negate = self._match(TokenType.NOT) 3186 3187 if self._match_text_seq("DISTINCT", "FROM"): 3188 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3189 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3190 3191 expression = self._parse_null() or self._parse_boolean() 3192 if not expression: 3193 self._retreat(index) 3194 return None 3195 3196 this = self.expression(exp.Is, this=this, expression=expression) 3197 return self.expression(exp.Not, this=this) if negate else this 3198 3199 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3200 unnest = self._parse_unnest(with_alias=False) 3201 if unnest: 3202 this = self.expression(exp.In, this=this, unnest=unnest) 3203 elif self._match(TokenType.L_PAREN): 3204 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3205 3206 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3207 this = self.expression(exp.In, this=this, query=expressions[0]) 3208 else: 3209 this = self.expression(exp.In, this=this, expressions=expressions) 3210 3211 self._match_r_paren(this) 3212 else: 3213 this = self.expression(exp.In, this=this, field=self._parse_field()) 3214 3215 return this 3216 3217 def _parse_between(self, this: exp.Expression) -> exp.Between: 3218 low = self._parse_bitwise() 3219 self._match(TokenType.AND) 3220 high = self._parse_bitwise() 3221 return self.expression(exp.Between, this=this, low=low, high=high) 3222 3223 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3224 if not self._match(TokenType.ESCAPE): 3225 return this 3226 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3227 3228 def _parse_interval(self) -> t.Optional[exp.Interval]: 3229 index = self._index 3230 3231 if not self._match(TokenType.INTERVAL): 3232 return None 3233 3234 if self._match(TokenType.STRING, advance=False): 3235 this = self._parse_primary() 3236 else: 3237 this = self._parse_term() 3238 3239 if not this: 3240 self._retreat(index) 3241 return None 3242 3243 unit = self._parse_function() or self._parse_var(any_token=True) 3244 3245 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3246 # each INTERVAL expression into this canonical form so it's easy to transpile 3247 if this and this.is_number: 3248 this = exp.Literal.string(this.name) 3249 elif this and this.is_string: 3250 parts = this.name.split() 3251 3252 if len(parts) == 2: 3253 if unit: 3254 # This is not actually a unit, it's something else (e.g. a "window side") 3255 unit = None 3256 self._retreat(self._index - 1) 3257 3258 this = exp.Literal.string(parts[0]) 3259 unit = self.expression(exp.Var, this=parts[1]) 3260 3261 return self.expression(exp.Interval, this=this, unit=unit) 3262 3263 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3264 this = self._parse_term() 3265 3266 while True: 3267 if self._match_set(self.BITWISE): 3268 this = self.expression( 3269 self.BITWISE[self._prev.token_type], 3270 this=this, 3271 expression=self._parse_term(), 3272 ) 3273 elif self._match(TokenType.DQMARK): 3274 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3275 elif self._match_pair(TokenType.LT, TokenType.LT): 3276 this = self.expression( 3277 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3278 ) 3279 elif self._match_pair(TokenType.GT, TokenType.GT): 3280 this = self.expression( 3281 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3282 ) 3283 else: 3284 break 3285 3286 return this 3287 3288 def _parse_term(self) -> t.Optional[exp.Expression]: 3289 return self._parse_tokens(self._parse_factor, self.TERM) 3290 3291 def _parse_factor(self) -> t.Optional[exp.Expression]: 3292 return self._parse_tokens(self._parse_unary, self.FACTOR) 3293 3294 def _parse_unary(self) -> t.Optional[exp.Expression]: 3295 if self._match_set(self.UNARY_PARSERS): 3296 return self.UNARY_PARSERS[self._prev.token_type](self) 3297 return self._parse_at_time_zone(self._parse_type()) 3298 3299 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3300 interval = parse_interval and self._parse_interval() 3301 if interval: 3302 return interval 3303 3304 index = self._index 3305 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3306 this = self._parse_column() 3307 3308 if data_type: 3309 if isinstance(this, exp.Literal): 3310 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3311 if parser: 3312 return parser(self, this, data_type) 3313 return self.expression(exp.Cast, this=this, to=data_type) 3314 if not data_type.expressions: 3315 self._retreat(index) 3316 return self._parse_column() 3317 return self._parse_column_ops(data_type) 3318 3319 return this and self._parse_column_ops(this) 3320 3321 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3322 this = self._parse_type() 3323 if not this: 3324 return None 3325 3326 return self.expression( 3327 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3328 ) 3329 3330 def _parse_types( 3331 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3332 ) -> t.Optional[exp.Expression]: 3333 index = self._index 3334 3335 prefix = self._match_text_seq("SYSUDTLIB", ".") 3336 3337 if not self._match_set(self.TYPE_TOKENS): 3338 identifier = allow_identifiers and self._parse_id_var( 3339 any_token=False, tokens=(TokenType.VAR,) 3340 ) 3341 3342 if identifier: 3343 tokens = self._tokenizer.tokenize(identifier.name) 3344 3345 if len(tokens) != 1: 3346 self.raise_error("Unexpected identifier", self._prev) 3347 3348 if tokens[0].token_type in self.TYPE_TOKENS: 3349 self._prev = tokens[0] 3350 elif self.SUPPORTS_USER_DEFINED_TYPES: 3351 type_name = identifier.name 3352 3353 while self._match(TokenType.DOT): 3354 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3355 3356 return exp.DataType.build(type_name, udt=True) 3357 else: 3358 return None 3359 else: 3360 return None 3361 3362 type_token = self._prev.token_type 3363 3364 if type_token == TokenType.PSEUDO_TYPE: 3365 return self.expression(exp.PseudoType, this=self._prev.text) 3366 3367 if type_token == TokenType.OBJECT_IDENTIFIER: 3368 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3369 3370 nested = type_token in self.NESTED_TYPE_TOKENS 3371 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3372 expressions = None 3373 maybe_func = False 3374 3375 if self._match(TokenType.L_PAREN): 3376 if is_struct: 3377 expressions = self._parse_csv(self._parse_struct_types) 3378 elif nested: 3379 expressions = self._parse_csv( 3380 lambda: self._parse_types( 3381 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3382 ) 3383 ) 3384 elif type_token in self.ENUM_TYPE_TOKENS: 3385 expressions = self._parse_csv(self._parse_equality) 3386 else: 3387 expressions = self._parse_csv(self._parse_type_size) 3388 3389 if not expressions or not self._match(TokenType.R_PAREN): 3390 self._retreat(index) 3391 return None 3392 3393 maybe_func = True 3394 3395 this: t.Optional[exp.Expression] = None 3396 values: t.Optional[t.List[exp.Expression]] = None 3397 3398 if nested and self._match(TokenType.LT): 3399 if is_struct: 3400 expressions = self._parse_csv(self._parse_struct_types) 3401 else: 3402 expressions = self._parse_csv( 3403 lambda: self._parse_types( 3404 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3405 ) 3406 ) 3407 3408 if not self._match(TokenType.GT): 3409 self.raise_error("Expecting >") 3410 3411 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3412 values = self._parse_csv(self._parse_conjunction) 3413 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3414 3415 if type_token in self.TIMESTAMPS: 3416 if self._match_text_seq("WITH", "TIME", "ZONE"): 3417 maybe_func = False 3418 tz_type = ( 3419 exp.DataType.Type.TIMETZ 3420 if type_token in self.TIMES 3421 else exp.DataType.Type.TIMESTAMPTZ 3422 ) 3423 this = exp.DataType(this=tz_type, expressions=expressions) 3424 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3425 maybe_func = False 3426 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3427 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3428 maybe_func = False 3429 elif type_token == TokenType.INTERVAL: 3430 unit = self._parse_var() 3431 3432 if self._match_text_seq("TO"): 3433 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3434 else: 3435 span = None 3436 3437 if span or not unit: 3438 this = self.expression( 3439 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3440 ) 3441 else: 3442 this = self.expression(exp.Interval, unit=unit) 3443 3444 if maybe_func and check_func: 3445 index2 = self._index 3446 peek = self._parse_string() 3447 3448 if not peek: 3449 self._retreat(index) 3450 return None 3451 3452 self._retreat(index2) 3453 3454 if not this: 3455 if self._match_text_seq("UNSIGNED"): 3456 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3457 if not unsigned_type_token: 3458 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3459 3460 type_token = unsigned_type_token or type_token 3461 3462 this = exp.DataType( 3463 this=exp.DataType.Type[type_token.value], 3464 expressions=expressions, 3465 nested=nested, 3466 values=values, 3467 prefix=prefix, 3468 ) 3469 3470 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3471 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3472 3473 return this 3474 3475 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3476 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3477 self._match(TokenType.COLON) 3478 return self._parse_column_def(this) 3479 3480 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3481 if not self._match_text_seq("AT", "TIME", "ZONE"): 3482 return this 3483 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3484 3485 def _parse_column(self) -> t.Optional[exp.Expression]: 3486 this = self._parse_field() 3487 if isinstance(this, exp.Identifier): 3488 this = self.expression(exp.Column, this=this) 3489 elif not this: 3490 return self._parse_bracket(this) 3491 return self._parse_column_ops(this) 3492 3493 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3494 this = self._parse_bracket(this) 3495 3496 while self._match_set(self.COLUMN_OPERATORS): 3497 op_token = self._prev.token_type 3498 op = self.COLUMN_OPERATORS.get(op_token) 3499 3500 if op_token == TokenType.DCOLON: 3501 field = self._parse_types() 3502 if not field: 3503 self.raise_error("Expected type") 3504 elif op and self._curr: 3505 self._advance() 3506 value = self._prev.text 3507 field = ( 3508 exp.Literal.number(value) 3509 if self._prev.token_type == TokenType.NUMBER 3510 else exp.Literal.string(value) 3511 ) 3512 else: 3513 field = self._parse_field(anonymous_func=True, any_token=True) 3514 3515 if isinstance(field, exp.Func): 3516 # bigquery allows function calls like x.y.count(...) 3517 # SAFE.SUBSTR(...) 3518 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3519 this = self._replace_columns_with_dots(this) 3520 3521 if op: 3522 this = op(self, this, field) 3523 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3524 this = self.expression( 3525 exp.Column, 3526 this=field, 3527 table=this.this, 3528 db=this.args.get("table"), 3529 catalog=this.args.get("db"), 3530 ) 3531 else: 3532 this = self.expression(exp.Dot, this=this, expression=field) 3533 this = self._parse_bracket(this) 3534 return this 3535 3536 def _parse_primary(self) -> t.Optional[exp.Expression]: 3537 if self._match_set(self.PRIMARY_PARSERS): 3538 token_type = self._prev.token_type 3539 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3540 3541 if token_type == TokenType.STRING: 3542 expressions = [primary] 3543 while self._match(TokenType.STRING): 3544 expressions.append(exp.Literal.string(self._prev.text)) 3545 3546 if len(expressions) > 1: 3547 return self.expression(exp.Concat, expressions=expressions) 3548 3549 return primary 3550 3551 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3552 return exp.Literal.number(f"0.{self._prev.text}") 3553 3554 if self._match(TokenType.L_PAREN): 3555 comments = self._prev_comments 3556 query = self._parse_select() 3557 3558 if query: 3559 expressions = [query] 3560 else: 3561 expressions = self._parse_expressions() 3562 3563 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3564 3565 if isinstance(this, exp.Subqueryable): 3566 this = self._parse_set_operations( 3567 self._parse_subquery(this=this, parse_alias=False) 3568 ) 3569 elif len(expressions) > 1: 3570 this = self.expression(exp.Tuple, expressions=expressions) 3571 else: 3572 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3573 3574 if this: 3575 this.add_comments(comments) 3576 3577 self._match_r_paren(expression=this) 3578 return this 3579 3580 return None 3581 3582 def _parse_field( 3583 self, 3584 any_token: bool = False, 3585 tokens: t.Optional[t.Collection[TokenType]] = None, 3586 anonymous_func: bool = False, 3587 ) -> t.Optional[exp.Expression]: 3588 return ( 3589 self._parse_primary() 3590 or self._parse_function(anonymous=anonymous_func) 3591 or self._parse_id_var(any_token=any_token, tokens=tokens) 3592 ) 3593 3594 def _parse_function( 3595 self, 3596 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3597 anonymous: bool = False, 3598 optional_parens: bool = True, 3599 ) -> t.Optional[exp.Expression]: 3600 if not self._curr: 3601 return None 3602 3603 token_type = self._curr.token_type 3604 this = self._curr.text 3605 upper = this.upper() 3606 3607 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3608 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3609 self._advance() 3610 return parser(self) 3611 3612 if not self._next or self._next.token_type != TokenType.L_PAREN: 3613 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3614 self._advance() 3615 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3616 3617 return None 3618 3619 if token_type not in self.FUNC_TOKENS: 3620 return None 3621 3622 self._advance(2) 3623 3624 parser = self.FUNCTION_PARSERS.get(upper) 3625 if parser and not anonymous: 3626 this = parser(self) 3627 else: 3628 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3629 3630 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3631 this = self.expression(subquery_predicate, this=self._parse_select()) 3632 self._match_r_paren() 3633 return this 3634 3635 if functions is None: 3636 functions = self.FUNCTIONS 3637 3638 function = functions.get(upper) 3639 3640 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3641 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3642 3643 if function and not anonymous: 3644 func = self.validate_expression(function(args), args) 3645 if not self.NORMALIZE_FUNCTIONS: 3646 func.meta["name"] = this 3647 this = func 3648 else: 3649 this = self.expression(exp.Anonymous, this=this, expressions=args) 3650 3651 self._match_r_paren(this) 3652 return self._parse_window(this) 3653 3654 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3655 return self._parse_column_def(self._parse_id_var()) 3656 3657 def _parse_user_defined_function( 3658 self, kind: t.Optional[TokenType] = None 3659 ) -> t.Optional[exp.Expression]: 3660 this = self._parse_id_var() 3661 3662 while self._match(TokenType.DOT): 3663 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3664 3665 if not self._match(TokenType.L_PAREN): 3666 return this 3667 3668 expressions = self._parse_csv(self._parse_function_parameter) 3669 self._match_r_paren() 3670 return self.expression( 3671 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3672 ) 3673 3674 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3675 literal = self._parse_primary() 3676 if literal: 3677 return self.expression(exp.Introducer, this=token.text, expression=literal) 3678 3679 return self.expression(exp.Identifier, this=token.text) 3680 3681 def _parse_session_parameter(self) -> exp.SessionParameter: 3682 kind = None 3683 this = self._parse_id_var() or self._parse_primary() 3684 3685 if this and self._match(TokenType.DOT): 3686 kind = this.name 3687 this = self._parse_var() or self._parse_primary() 3688 3689 return self.expression(exp.SessionParameter, this=this, kind=kind) 3690 3691 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3692 index = self._index 3693 3694 if self._match(TokenType.L_PAREN): 3695 expressions = t.cast( 3696 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3697 ) 3698 3699 if not self._match(TokenType.R_PAREN): 3700 self._retreat(index) 3701 else: 3702 expressions = [self._parse_id_var()] 3703 3704 if self._match_set(self.LAMBDAS): 3705 return self.LAMBDAS[self._prev.token_type](self, expressions) 3706 3707 self._retreat(index) 3708 3709 this: t.Optional[exp.Expression] 3710 3711 if self._match(TokenType.DISTINCT): 3712 this = self.expression( 3713 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3714 ) 3715 else: 3716 this = self._parse_select_or_expression(alias=alias) 3717 3718 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3719 3720 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3721 index = self._index 3722 3723 if not self.errors: 3724 try: 3725 if self._parse_select(nested=True): 3726 return this 3727 except ParseError: 3728 pass 3729 finally: 3730 self.errors.clear() 3731 self._retreat(index) 3732 3733 if not self._match(TokenType.L_PAREN): 3734 return this 3735 3736 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3737 3738 self._match_r_paren() 3739 return self.expression(exp.Schema, this=this, expressions=args) 3740 3741 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3742 return self._parse_column_def(self._parse_field(any_token=True)) 3743 3744 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3745 # column defs are not really columns, they're identifiers 3746 if isinstance(this, exp.Column): 3747 this = this.this 3748 3749 kind = self._parse_types(schema=True) 3750 3751 if self._match_text_seq("FOR", "ORDINALITY"): 3752 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3753 3754 constraints: t.List[exp.Expression] = [] 3755 3756 if not kind and self._match(TokenType.ALIAS): 3757 constraints.append( 3758 self.expression( 3759 exp.ComputedColumnConstraint, 3760 this=self._parse_conjunction(), 3761 persisted=self._match_text_seq("PERSISTED"), 3762 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3763 ) 3764 ) 3765 3766 while True: 3767 constraint = self._parse_column_constraint() 3768 if not constraint: 3769 break 3770 constraints.append(constraint) 3771 3772 if not kind and not constraints: 3773 return this 3774 3775 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3776 3777 def _parse_auto_increment( 3778 self, 3779 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3780 start = None 3781 increment = None 3782 3783 if self._match(TokenType.L_PAREN, advance=False): 3784 args = self._parse_wrapped_csv(self._parse_bitwise) 3785 start = seq_get(args, 0) 3786 increment = seq_get(args, 1) 3787 elif self._match_text_seq("START"): 3788 start = self._parse_bitwise() 3789 self._match_text_seq("INCREMENT") 3790 increment = self._parse_bitwise() 3791 3792 if start and increment: 3793 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3794 3795 return exp.AutoIncrementColumnConstraint() 3796 3797 def _parse_compress(self) -> exp.CompressColumnConstraint: 3798 if self._match(TokenType.L_PAREN, advance=False): 3799 return self.expression( 3800 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3801 ) 3802 3803 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3804 3805 def _parse_generated_as_identity( 3806 self, 3807 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3808 if self._match_text_seq("BY", "DEFAULT"): 3809 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3810 this = self.expression( 3811 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3812 ) 3813 else: 3814 self._match_text_seq("ALWAYS") 3815 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3816 3817 self._match(TokenType.ALIAS) 3818 identity = self._match_text_seq("IDENTITY") 3819 3820 if self._match(TokenType.L_PAREN): 3821 if self._match(TokenType.START_WITH): 3822 this.set("start", self._parse_bitwise()) 3823 if self._match_text_seq("INCREMENT", "BY"): 3824 this.set("increment", self._parse_bitwise()) 3825 if self._match_text_seq("MINVALUE"): 3826 this.set("minvalue", self._parse_bitwise()) 3827 if self._match_text_seq("MAXVALUE"): 3828 this.set("maxvalue", self._parse_bitwise()) 3829 3830 if self._match_text_seq("CYCLE"): 3831 this.set("cycle", True) 3832 elif self._match_text_seq("NO", "CYCLE"): 3833 this.set("cycle", False) 3834 3835 if not identity: 3836 this.set("expression", self._parse_bitwise()) 3837 3838 self._match_r_paren() 3839 3840 return this 3841 3842 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3843 self._match_text_seq("LENGTH") 3844 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3845 3846 def _parse_not_constraint( 3847 self, 3848 ) -> t.Optional[exp.Expression]: 3849 if self._match_text_seq("NULL"): 3850 return self.expression(exp.NotNullColumnConstraint) 3851 if self._match_text_seq("CASESPECIFIC"): 3852 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3853 if self._match_text_seq("FOR", "REPLICATION"): 3854 return self.expression(exp.NotForReplicationColumnConstraint) 3855 return None 3856 3857 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3858 if self._match(TokenType.CONSTRAINT): 3859 this = self._parse_id_var() 3860 else: 3861 this = None 3862 3863 if self._match_texts(self.CONSTRAINT_PARSERS): 3864 return self.expression( 3865 exp.ColumnConstraint, 3866 this=this, 3867 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3868 ) 3869 3870 return this 3871 3872 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3873 if not self._match(TokenType.CONSTRAINT): 3874 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3875 3876 this = self._parse_id_var() 3877 expressions = [] 3878 3879 while True: 3880 constraint = self._parse_unnamed_constraint() or self._parse_function() 3881 if not constraint: 3882 break 3883 expressions.append(constraint) 3884 3885 return self.expression(exp.Constraint, this=this, expressions=expressions) 3886 3887 def _parse_unnamed_constraint( 3888 self, constraints: t.Optional[t.Collection[str]] = None 3889 ) -> t.Optional[exp.Expression]: 3890 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3891 return None 3892 3893 constraint = self._prev.text.upper() 3894 if constraint not in self.CONSTRAINT_PARSERS: 3895 self.raise_error(f"No parser found for schema constraint {constraint}.") 3896 3897 return self.CONSTRAINT_PARSERS[constraint](self) 3898 3899 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3900 self._match_text_seq("KEY") 3901 return self.expression( 3902 exp.UniqueColumnConstraint, 3903 this=self._parse_schema(self._parse_id_var(any_token=False)), 3904 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3905 ) 3906 3907 def _parse_key_constraint_options(self) -> t.List[str]: 3908 options = [] 3909 while True: 3910 if not self._curr: 3911 break 3912 3913 if self._match(TokenType.ON): 3914 action = None 3915 on = self._advance_any() and self._prev.text 3916 3917 if self._match_text_seq("NO", "ACTION"): 3918 action = "NO ACTION" 3919 elif self._match_text_seq("CASCADE"): 3920 action = "CASCADE" 3921 elif self._match_text_seq("RESTRICT"): 3922 action = "RESTRICT" 3923 elif self._match_pair(TokenType.SET, TokenType.NULL): 3924 action = "SET NULL" 3925 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3926 action = "SET DEFAULT" 3927 else: 3928 self.raise_error("Invalid key constraint") 3929 3930 options.append(f"ON {on} {action}") 3931 elif self._match_text_seq("NOT", "ENFORCED"): 3932 options.append("NOT ENFORCED") 3933 elif self._match_text_seq("DEFERRABLE"): 3934 options.append("DEFERRABLE") 3935 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3936 options.append("INITIALLY DEFERRED") 3937 elif self._match_text_seq("NORELY"): 3938 options.append("NORELY") 3939 elif self._match_text_seq("MATCH", "FULL"): 3940 options.append("MATCH FULL") 3941 else: 3942 break 3943 3944 return options 3945 3946 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3947 if match and not self._match(TokenType.REFERENCES): 3948 return None 3949 3950 expressions = None 3951 this = self._parse_table(schema=True) 3952 options = self._parse_key_constraint_options() 3953 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3954 3955 def _parse_foreign_key(self) -> exp.ForeignKey: 3956 expressions = self._parse_wrapped_id_vars() 3957 reference = self._parse_references() 3958 options = {} 3959 3960 while self._match(TokenType.ON): 3961 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3962 self.raise_error("Expected DELETE or UPDATE") 3963 3964 kind = self._prev.text.lower() 3965 3966 if self._match_text_seq("NO", "ACTION"): 3967 action = "NO ACTION" 3968 elif self._match(TokenType.SET): 3969 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3970 action = "SET " + self._prev.text.upper() 3971 else: 3972 self._advance() 3973 action = self._prev.text.upper() 3974 3975 options[kind] = action 3976 3977 return self.expression( 3978 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3979 ) 3980 3981 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3982 return self._parse_field() 3983 3984 def _parse_primary_key( 3985 self, wrapped_optional: bool = False, in_props: bool = False 3986 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3987 desc = ( 3988 self._match_set((TokenType.ASC, TokenType.DESC)) 3989 and self._prev.token_type == TokenType.DESC 3990 ) 3991 3992 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3993 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3994 3995 expressions = self._parse_wrapped_csv( 3996 self._parse_primary_key_part, optional=wrapped_optional 3997 ) 3998 options = self._parse_key_constraint_options() 3999 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4000 4001 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4002 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4003 return this 4004 4005 bracket_kind = self._prev.token_type 4006 4007 if self._match(TokenType.COLON): 4008 expressions: t.List[exp.Expression] = [ 4009 self.expression(exp.Slice, expression=self._parse_conjunction()) 4010 ] 4011 else: 4012 expressions = self._parse_csv( 4013 lambda: self._parse_slice( 4014 self._parse_alias(self._parse_conjunction(), explicit=True) 4015 ) 4016 ) 4017 4018 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4019 if bracket_kind == TokenType.L_BRACE: 4020 this = self.expression(exp.Struct, expressions=expressions) 4021 elif not this or this.name.upper() == "ARRAY": 4022 this = self.expression(exp.Array, expressions=expressions) 4023 else: 4024 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4025 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4026 4027 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4028 self.raise_error("Expected ]") 4029 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4030 self.raise_error("Expected }") 4031 4032 self._add_comments(this) 4033 return self._parse_bracket(this) 4034 4035 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4036 if self._match(TokenType.COLON): 4037 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4038 return this 4039 4040 def _parse_case(self) -> t.Optional[exp.Expression]: 4041 ifs = [] 4042 default = None 4043 4044 comments = self._prev_comments 4045 expression = self._parse_conjunction() 4046 4047 while self._match(TokenType.WHEN): 4048 this = self._parse_conjunction() 4049 self._match(TokenType.THEN) 4050 then = self._parse_conjunction() 4051 ifs.append(self.expression(exp.If, this=this, true=then)) 4052 4053 if self._match(TokenType.ELSE): 4054 default = self._parse_conjunction() 4055 4056 if not self._match(TokenType.END): 4057 self.raise_error("Expected END after CASE", self._prev) 4058 4059 return self._parse_window( 4060 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4061 ) 4062 4063 def _parse_if(self) -> t.Optional[exp.Expression]: 4064 if self._match(TokenType.L_PAREN): 4065 args = self._parse_csv(self._parse_conjunction) 4066 this = self.validate_expression(exp.If.from_arg_list(args), args) 4067 self._match_r_paren() 4068 else: 4069 index = self._index - 1 4070 condition = self._parse_conjunction() 4071 4072 if not condition: 4073 self._retreat(index) 4074 return None 4075 4076 self._match(TokenType.THEN) 4077 true = self._parse_conjunction() 4078 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4079 self._match(TokenType.END) 4080 this = self.expression(exp.If, this=condition, true=true, false=false) 4081 4082 return self._parse_window(this) 4083 4084 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4085 if not self._match_text_seq("VALUE", "FOR"): 4086 self._retreat(self._index - 1) 4087 return None 4088 4089 return self.expression( 4090 exp.NextValueFor, 4091 this=self._parse_column(), 4092 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4093 ) 4094 4095 def _parse_extract(self) -> exp.Extract: 4096 this = self._parse_function() or self._parse_var() or self._parse_type() 4097 4098 if self._match(TokenType.FROM): 4099 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4100 4101 if not self._match(TokenType.COMMA): 4102 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4103 4104 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4105 4106 def _parse_any_value(self) -> exp.AnyValue: 4107 this = self._parse_lambda() 4108 is_max = None 4109 having = None 4110 4111 if self._match(TokenType.HAVING): 4112 self._match_texts(("MAX", "MIN")) 4113 is_max = self._prev.text == "MAX" 4114 having = self._parse_column() 4115 4116 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4117 4118 def _parse_cast(self, strict: bool) -> exp.Expression: 4119 this = self._parse_conjunction() 4120 4121 if not self._match(TokenType.ALIAS): 4122 if self._match(TokenType.COMMA): 4123 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4124 4125 self.raise_error("Expected AS after CAST") 4126 4127 fmt = None 4128 to = self._parse_types() 4129 4130 if not to: 4131 self.raise_error("Expected TYPE after CAST") 4132 elif isinstance(to, exp.Identifier): 4133 to = exp.DataType.build(to.name, udt=True) 4134 elif to.this == exp.DataType.Type.CHAR: 4135 if self._match(TokenType.CHARACTER_SET): 4136 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4137 elif self._match(TokenType.FORMAT): 4138 fmt_string = self._parse_string() 4139 fmt = self._parse_at_time_zone(fmt_string) 4140 4141 if to.this in exp.DataType.TEMPORAL_TYPES: 4142 this = self.expression( 4143 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4144 this=this, 4145 format=exp.Literal.string( 4146 format_time( 4147 fmt_string.this if fmt_string else "", 4148 self.FORMAT_MAPPING or self.TIME_MAPPING, 4149 self.FORMAT_TRIE or self.TIME_TRIE, 4150 ) 4151 ), 4152 ) 4153 4154 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4155 this.set("zone", fmt.args["zone"]) 4156 4157 return this 4158 4159 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4160 4161 def _parse_concat(self) -> t.Optional[exp.Expression]: 4162 args = self._parse_csv(self._parse_conjunction) 4163 if self.CONCAT_NULL_OUTPUTS_STRING: 4164 args = self._ensure_string_if_null(args) 4165 4166 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4167 # we find such a call we replace it with its argument. 4168 if len(args) == 1: 4169 return args[0] 4170 4171 return self.expression( 4172 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4173 ) 4174 4175 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4176 args = self._parse_csv(self._parse_conjunction) 4177 if len(args) < 2: 4178 return self.expression(exp.ConcatWs, expressions=args) 4179 delim, *values = args 4180 if self.CONCAT_NULL_OUTPUTS_STRING: 4181 values = self._ensure_string_if_null(values) 4182 4183 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4184 4185 def _parse_string_agg(self) -> exp.Expression: 4186 if self._match(TokenType.DISTINCT): 4187 args: t.List[t.Optional[exp.Expression]] = [ 4188 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4189 ] 4190 if self._match(TokenType.COMMA): 4191 args.extend(self._parse_csv(self._parse_conjunction)) 4192 else: 4193 args = self._parse_csv(self._parse_conjunction) # type: ignore 4194 4195 index = self._index 4196 if not self._match(TokenType.R_PAREN) and args: 4197 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4198 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4199 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4200 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4201 4202 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4203 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4204 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4205 if not self._match_text_seq("WITHIN", "GROUP"): 4206 self._retreat(index) 4207 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4208 4209 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4210 order = self._parse_order(this=seq_get(args, 0)) 4211 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4212 4213 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4214 this = self._parse_bitwise() 4215 4216 if self._match(TokenType.USING): 4217 to: t.Optional[exp.Expression] = self.expression( 4218 exp.CharacterSet, this=self._parse_var() 4219 ) 4220 elif self._match(TokenType.COMMA): 4221 to = self._parse_types() 4222 else: 4223 to = None 4224 4225 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4226 4227 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4228 """ 4229 There are generally two variants of the DECODE function: 4230 4231 - DECODE(bin, charset) 4232 - DECODE(expression, search, result [, search, result] ... [, default]) 4233 4234 The second variant will always be parsed into a CASE expression. Note that NULL 4235 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4236 instead of relying on pattern matching. 4237 """ 4238 args = self._parse_csv(self._parse_conjunction) 4239 4240 if len(args) < 3: 4241 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4242 4243 expression, *expressions = args 4244 if not expression: 4245 return None 4246 4247 ifs = [] 4248 for search, result in zip(expressions[::2], expressions[1::2]): 4249 if not search or not result: 4250 return None 4251 4252 if isinstance(search, exp.Literal): 4253 ifs.append( 4254 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4255 ) 4256 elif isinstance(search, exp.Null): 4257 ifs.append( 4258 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4259 ) 4260 else: 4261 cond = exp.or_( 4262 exp.EQ(this=expression.copy(), expression=search), 4263 exp.and_( 4264 exp.Is(this=expression.copy(), expression=exp.Null()), 4265 exp.Is(this=search.copy(), expression=exp.Null()), 4266 copy=False, 4267 ), 4268 copy=False, 4269 ) 4270 ifs.append(exp.If(this=cond, true=result)) 4271 4272 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4273 4274 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4275 self._match_text_seq("KEY") 4276 key = self._parse_column() 4277 self._match_set((TokenType.COLON, TokenType.COMMA)) 4278 self._match_text_seq("VALUE") 4279 value = self._parse_bitwise() 4280 4281 if not key and not value: 4282 return None 4283 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4284 4285 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4286 if not this or not self._match_text_seq("FORMAT", "JSON"): 4287 return this 4288 4289 return self.expression(exp.FormatJson, this=this) 4290 4291 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4292 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4293 for value in values: 4294 if self._match_text_seq(value, "ON", on): 4295 return f"{value} ON {on}" 4296 4297 return None 4298 4299 def _parse_json_object(self) -> exp.JSONObject: 4300 star = self._parse_star() 4301 expressions = ( 4302 [star] 4303 if star 4304 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4305 ) 4306 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4307 4308 unique_keys = None 4309 if self._match_text_seq("WITH", "UNIQUE"): 4310 unique_keys = True 4311 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4312 unique_keys = False 4313 4314 self._match_text_seq("KEYS") 4315 4316 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4317 self._parse_type() 4318 ) 4319 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4320 4321 return self.expression( 4322 exp.JSONObject, 4323 expressions=expressions, 4324 null_handling=null_handling, 4325 unique_keys=unique_keys, 4326 return_type=return_type, 4327 encoding=encoding, 4328 ) 4329 4330 def _parse_logarithm(self) -> exp.Func: 4331 # Default argument order is base, expression 4332 args = self._parse_csv(self._parse_range) 4333 4334 if len(args) > 1: 4335 if not self.LOG_BASE_FIRST: 4336 args.reverse() 4337 return exp.Log.from_arg_list(args) 4338 4339 return self.expression( 4340 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4341 ) 4342 4343 def _parse_match_against(self) -> exp.MatchAgainst: 4344 expressions = self._parse_csv(self._parse_column) 4345 4346 self._match_text_seq(")", "AGAINST", "(") 4347 4348 this = self._parse_string() 4349 4350 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4351 modifier = "IN NATURAL LANGUAGE MODE" 4352 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4353 modifier = f"{modifier} WITH QUERY EXPANSION" 4354 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4355 modifier = "IN BOOLEAN MODE" 4356 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4357 modifier = "WITH QUERY EXPANSION" 4358 else: 4359 modifier = None 4360 4361 return self.expression( 4362 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4363 ) 4364 4365 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4366 def _parse_open_json(self) -> exp.OpenJSON: 4367 this = self._parse_bitwise() 4368 path = self._match(TokenType.COMMA) and self._parse_string() 4369 4370 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4371 this = self._parse_field(any_token=True) 4372 kind = self._parse_types() 4373 path = self._parse_string() 4374 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4375 4376 return self.expression( 4377 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4378 ) 4379 4380 expressions = None 4381 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4382 self._match_l_paren() 4383 expressions = self._parse_csv(_parse_open_json_column_def) 4384 4385 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4386 4387 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4388 args = self._parse_csv(self._parse_bitwise) 4389 4390 if self._match(TokenType.IN): 4391 return self.expression( 4392 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4393 ) 4394 4395 if haystack_first: 4396 haystack = seq_get(args, 0) 4397 needle = seq_get(args, 1) 4398 else: 4399 needle = seq_get(args, 0) 4400 haystack = seq_get(args, 1) 4401 4402 return self.expression( 4403 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4404 ) 4405 4406 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4407 args = self._parse_csv(self._parse_table) 4408 return exp.JoinHint(this=func_name.upper(), expressions=args) 4409 4410 def _parse_substring(self) -> exp.Substring: 4411 # Postgres supports the form: substring(string [from int] [for int]) 4412 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4413 4414 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4415 4416 if self._match(TokenType.FROM): 4417 args.append(self._parse_bitwise()) 4418 if self._match(TokenType.FOR): 4419 args.append(self._parse_bitwise()) 4420 4421 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4422 4423 def _parse_trim(self) -> exp.Trim: 4424 # https://www.w3resource.com/sql/character-functions/trim.php 4425 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4426 4427 position = None 4428 collation = None 4429 expression = None 4430 4431 if self._match_texts(self.TRIM_TYPES): 4432 position = self._prev.text.upper() 4433 4434 this = self._parse_bitwise() 4435 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4436 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4437 expression = self._parse_bitwise() 4438 4439 if invert_order: 4440 this, expression = expression, this 4441 4442 if self._match(TokenType.COLLATE): 4443 collation = self._parse_bitwise() 4444 4445 return self.expression( 4446 exp.Trim, this=this, position=position, expression=expression, collation=collation 4447 ) 4448 4449 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4450 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4451 4452 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4453 return self._parse_window(self._parse_id_var(), alias=True) 4454 4455 def _parse_respect_or_ignore_nulls( 4456 self, this: t.Optional[exp.Expression] 4457 ) -> t.Optional[exp.Expression]: 4458 if self._match_text_seq("IGNORE", "NULLS"): 4459 return self.expression(exp.IgnoreNulls, this=this) 4460 if self._match_text_seq("RESPECT", "NULLS"): 4461 return self.expression(exp.RespectNulls, this=this) 4462 return this 4463 4464 def _parse_window( 4465 self, this: t.Optional[exp.Expression], alias: bool = False 4466 ) -> t.Optional[exp.Expression]: 4467 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4468 self._match(TokenType.WHERE) 4469 this = self.expression( 4470 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4471 ) 4472 self._match_r_paren() 4473 4474 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4475 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4476 if self._match_text_seq("WITHIN", "GROUP"): 4477 order = self._parse_wrapped(self._parse_order) 4478 this = self.expression(exp.WithinGroup, this=this, expression=order) 4479 4480 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4481 # Some dialects choose to implement and some do not. 4482 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4483 4484 # There is some code above in _parse_lambda that handles 4485 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4486 4487 # The below changes handle 4488 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4489 4490 # Oracle allows both formats 4491 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4492 # and Snowflake chose to do the same for familiarity 4493 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4494 this = self._parse_respect_or_ignore_nulls(this) 4495 4496 # bigquery select from window x AS (partition by ...) 4497 if alias: 4498 over = None 4499 self._match(TokenType.ALIAS) 4500 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4501 return this 4502 else: 4503 over = self._prev.text.upper() 4504 4505 if not self._match(TokenType.L_PAREN): 4506 return self.expression( 4507 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4508 ) 4509 4510 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4511 4512 first = self._match(TokenType.FIRST) 4513 if self._match_text_seq("LAST"): 4514 first = False 4515 4516 partition, order = self._parse_partition_and_order() 4517 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4518 4519 if kind: 4520 self._match(TokenType.BETWEEN) 4521 start = self._parse_window_spec() 4522 self._match(TokenType.AND) 4523 end = self._parse_window_spec() 4524 4525 spec = self.expression( 4526 exp.WindowSpec, 4527 kind=kind, 4528 start=start["value"], 4529 start_side=start["side"], 4530 end=end["value"], 4531 end_side=end["side"], 4532 ) 4533 else: 4534 spec = None 4535 4536 self._match_r_paren() 4537 4538 window = self.expression( 4539 exp.Window, 4540 this=this, 4541 partition_by=partition, 4542 order=order, 4543 spec=spec, 4544 alias=window_alias, 4545 over=over, 4546 first=first, 4547 ) 4548 4549 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4550 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4551 return self._parse_window(window, alias=alias) 4552 4553 return window 4554 4555 def _parse_partition_and_order( 4556 self, 4557 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4558 return self._parse_partition_by(), self._parse_order() 4559 4560 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4561 self._match(TokenType.BETWEEN) 4562 4563 return { 4564 "value": ( 4565 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4566 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4567 or self._parse_bitwise() 4568 ), 4569 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4570 } 4571 4572 def _parse_alias( 4573 self, this: t.Optional[exp.Expression], explicit: bool = False 4574 ) -> t.Optional[exp.Expression]: 4575 any_token = self._match(TokenType.ALIAS) 4576 4577 if explicit and not any_token: 4578 return this 4579 4580 if self._match(TokenType.L_PAREN): 4581 aliases = self.expression( 4582 exp.Aliases, 4583 this=this, 4584 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4585 ) 4586 self._match_r_paren(aliases) 4587 return aliases 4588 4589 alias = self._parse_id_var(any_token) 4590 4591 if alias: 4592 return self.expression(exp.Alias, this=this, alias=alias) 4593 4594 return this 4595 4596 def _parse_id_var( 4597 self, 4598 any_token: bool = True, 4599 tokens: t.Optional[t.Collection[TokenType]] = None, 4600 ) -> t.Optional[exp.Expression]: 4601 identifier = self._parse_identifier() 4602 4603 if identifier: 4604 return identifier 4605 4606 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4607 quoted = self._prev.token_type == TokenType.STRING 4608 return exp.Identifier(this=self._prev.text, quoted=quoted) 4609 4610 return None 4611 4612 def _parse_string(self) -> t.Optional[exp.Expression]: 4613 if self._match(TokenType.STRING): 4614 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4615 return self._parse_placeholder() 4616 4617 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4618 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4619 4620 def _parse_number(self) -> t.Optional[exp.Expression]: 4621 if self._match(TokenType.NUMBER): 4622 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4623 return self._parse_placeholder() 4624 4625 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4626 if self._match(TokenType.IDENTIFIER): 4627 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4628 return self._parse_placeholder() 4629 4630 def _parse_var( 4631 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4632 ) -> t.Optional[exp.Expression]: 4633 if ( 4634 (any_token and self._advance_any()) 4635 or self._match(TokenType.VAR) 4636 or (self._match_set(tokens) if tokens else False) 4637 ): 4638 return self.expression(exp.Var, this=self._prev.text) 4639 return self._parse_placeholder() 4640 4641 def _advance_any(self) -> t.Optional[Token]: 4642 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4643 self._advance() 4644 return self._prev 4645 return None 4646 4647 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4648 return self._parse_var() or self._parse_string() 4649 4650 def _parse_null(self) -> t.Optional[exp.Expression]: 4651 if self._match_set(self.NULL_TOKENS): 4652 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4653 return self._parse_placeholder() 4654 4655 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4656 if self._match(TokenType.TRUE): 4657 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4658 if self._match(TokenType.FALSE): 4659 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4660 return self._parse_placeholder() 4661 4662 def _parse_star(self) -> t.Optional[exp.Expression]: 4663 if self._match(TokenType.STAR): 4664 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4665 return self._parse_placeholder() 4666 4667 def _parse_parameter(self) -> exp.Parameter: 4668 wrapped = self._match(TokenType.L_BRACE) 4669 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4670 self._match(TokenType.R_BRACE) 4671 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4672 4673 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4674 if self._match_set(self.PLACEHOLDER_PARSERS): 4675 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4676 if placeholder: 4677 return placeholder 4678 self._advance(-1) 4679 return None 4680 4681 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4682 if not self._match(TokenType.EXCEPT): 4683 return None 4684 if self._match(TokenType.L_PAREN, advance=False): 4685 return self._parse_wrapped_csv(self._parse_column) 4686 4687 except_column = self._parse_column() 4688 return [except_column] if except_column else None 4689 4690 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4691 if not self._match(TokenType.REPLACE): 4692 return None 4693 if self._match(TokenType.L_PAREN, advance=False): 4694 return self._parse_wrapped_csv(self._parse_expression) 4695 4696 replace_expression = self._parse_expression() 4697 return [replace_expression] if replace_expression else None 4698 4699 def _parse_csv( 4700 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4701 ) -> t.List[exp.Expression]: 4702 parse_result = parse_method() 4703 items = [parse_result] if parse_result is not None else [] 4704 4705 while self._match(sep): 4706 self._add_comments(parse_result) 4707 parse_result = parse_method() 4708 if parse_result is not None: 4709 items.append(parse_result) 4710 4711 return items 4712 4713 def _parse_tokens( 4714 self, parse_method: t.Callable, expressions: t.Dict 4715 ) -> t.Optional[exp.Expression]: 4716 this = parse_method() 4717 4718 while self._match_set(expressions): 4719 this = self.expression( 4720 expressions[self._prev.token_type], 4721 this=this, 4722 comments=self._prev_comments, 4723 expression=parse_method(), 4724 ) 4725 4726 return this 4727 4728 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4729 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4730 4731 def _parse_wrapped_csv( 4732 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4733 ) -> t.List[exp.Expression]: 4734 return self._parse_wrapped( 4735 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4736 ) 4737 4738 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4739 wrapped = self._match(TokenType.L_PAREN) 4740 if not wrapped and not optional: 4741 self.raise_error("Expecting (") 4742 parse_result = parse_method() 4743 if wrapped: 4744 self._match_r_paren() 4745 return parse_result 4746 4747 def _parse_expressions(self) -> t.List[exp.Expression]: 4748 return self._parse_csv(self._parse_expression) 4749 4750 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4751 return self._parse_select() or self._parse_set_operations( 4752 self._parse_expression() if alias else self._parse_conjunction() 4753 ) 4754 4755 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4756 return self._parse_query_modifiers( 4757 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4758 ) 4759 4760 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4761 this = None 4762 if self._match_texts(self.TRANSACTION_KIND): 4763 this = self._prev.text 4764 4765 self._match_texts({"TRANSACTION", "WORK"}) 4766 4767 modes = [] 4768 while True: 4769 mode = [] 4770 while self._match(TokenType.VAR): 4771 mode.append(self._prev.text) 4772 4773 if mode: 4774 modes.append(" ".join(mode)) 4775 if not self._match(TokenType.COMMA): 4776 break 4777 4778 return self.expression(exp.Transaction, this=this, modes=modes) 4779 4780 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4781 chain = None 4782 savepoint = None 4783 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4784 4785 self._match_texts({"TRANSACTION", "WORK"}) 4786 4787 if self._match_text_seq("TO"): 4788 self._match_text_seq("SAVEPOINT") 4789 savepoint = self._parse_id_var() 4790 4791 if self._match(TokenType.AND): 4792 chain = not self._match_text_seq("NO") 4793 self._match_text_seq("CHAIN") 4794 4795 if is_rollback: 4796 return self.expression(exp.Rollback, savepoint=savepoint) 4797 4798 return self.expression(exp.Commit, chain=chain) 4799 4800 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4801 if not self._match_text_seq("ADD"): 4802 return None 4803 4804 self._match(TokenType.COLUMN) 4805 exists_column = self._parse_exists(not_=True) 4806 expression = self._parse_field_def() 4807 4808 if expression: 4809 expression.set("exists", exists_column) 4810 4811 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4812 if self._match_texts(("FIRST", "AFTER")): 4813 position = self._prev.text 4814 column_position = self.expression( 4815 exp.ColumnPosition, this=self._parse_column(), position=position 4816 ) 4817 expression.set("position", column_position) 4818 4819 return expression 4820 4821 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4822 drop = self._match(TokenType.DROP) and self._parse_drop() 4823 if drop and not isinstance(drop, exp.Command): 4824 drop.set("kind", drop.args.get("kind", "COLUMN")) 4825 return drop 4826 4827 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4828 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4829 return self.expression( 4830 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4831 ) 4832 4833 def _parse_add_constraint(self) -> exp.AddConstraint: 4834 this = None 4835 kind = self._prev.token_type 4836 4837 if kind == TokenType.CONSTRAINT: 4838 this = self._parse_id_var() 4839 4840 if self._match_text_seq("CHECK"): 4841 expression = self._parse_wrapped(self._parse_conjunction) 4842 enforced = self._match_text_seq("ENFORCED") 4843 4844 return self.expression( 4845 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4846 ) 4847 4848 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4849 expression = self._parse_foreign_key() 4850 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4851 expression = self._parse_primary_key() 4852 else: 4853 expression = None 4854 4855 return self.expression(exp.AddConstraint, this=this, expression=expression) 4856 4857 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4858 index = self._index - 1 4859 4860 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4861 return self._parse_csv(self._parse_add_constraint) 4862 4863 self._retreat(index) 4864 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4865 return self._parse_csv(self._parse_field_def) 4866 4867 return self._parse_csv(self._parse_add_column) 4868 4869 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4870 self._match(TokenType.COLUMN) 4871 column = self._parse_field(any_token=True) 4872 4873 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4874 return self.expression(exp.AlterColumn, this=column, drop=True) 4875 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4876 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4877 4878 self._match_text_seq("SET", "DATA") 4879 return self.expression( 4880 exp.AlterColumn, 4881 this=column, 4882 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4883 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4884 using=self._match(TokenType.USING) and self._parse_conjunction(), 4885 ) 4886 4887 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4888 index = self._index - 1 4889 4890 partition_exists = self._parse_exists() 4891 if self._match(TokenType.PARTITION, advance=False): 4892 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4893 4894 self._retreat(index) 4895 return self._parse_csv(self._parse_drop_column) 4896 4897 def _parse_alter_table_rename(self) -> exp.RenameTable: 4898 self._match_text_seq("TO") 4899 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4900 4901 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4902 start = self._prev 4903 4904 if not self._match(TokenType.TABLE): 4905 return self._parse_as_command(start) 4906 4907 exists = self._parse_exists() 4908 only = self._match_text_seq("ONLY") 4909 this = self._parse_table(schema=True) 4910 4911 if self._next: 4912 self._advance() 4913 4914 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4915 if parser: 4916 actions = ensure_list(parser(self)) 4917 4918 if not self._curr: 4919 return self.expression( 4920 exp.AlterTable, 4921 this=this, 4922 exists=exists, 4923 actions=actions, 4924 only=only, 4925 ) 4926 4927 return self._parse_as_command(start) 4928 4929 def _parse_merge(self) -> exp.Merge: 4930 self._match(TokenType.INTO) 4931 target = self._parse_table() 4932 4933 if target and self._match(TokenType.ALIAS, advance=False): 4934 target.set("alias", self._parse_table_alias()) 4935 4936 self._match(TokenType.USING) 4937 using = self._parse_table() 4938 4939 self._match(TokenType.ON) 4940 on = self._parse_conjunction() 4941 4942 whens = [] 4943 while self._match(TokenType.WHEN): 4944 matched = not self._match(TokenType.NOT) 4945 self._match_text_seq("MATCHED") 4946 source = ( 4947 False 4948 if self._match_text_seq("BY", "TARGET") 4949 else self._match_text_seq("BY", "SOURCE") 4950 ) 4951 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4952 4953 self._match(TokenType.THEN) 4954 4955 if self._match(TokenType.INSERT): 4956 _this = self._parse_star() 4957 if _this: 4958 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4959 else: 4960 then = self.expression( 4961 exp.Insert, 4962 this=self._parse_value(), 4963 expression=self._match(TokenType.VALUES) and self._parse_value(), 4964 ) 4965 elif self._match(TokenType.UPDATE): 4966 expressions = self._parse_star() 4967 if expressions: 4968 then = self.expression(exp.Update, expressions=expressions) 4969 else: 4970 then = self.expression( 4971 exp.Update, 4972 expressions=self._match(TokenType.SET) 4973 and self._parse_csv(self._parse_equality), 4974 ) 4975 elif self._match(TokenType.DELETE): 4976 then = self.expression(exp.Var, this=self._prev.text) 4977 else: 4978 then = None 4979 4980 whens.append( 4981 self.expression( 4982 exp.When, 4983 matched=matched, 4984 source=source, 4985 condition=condition, 4986 then=then, 4987 ) 4988 ) 4989 4990 return self.expression( 4991 exp.Merge, 4992 this=target, 4993 using=using, 4994 on=on, 4995 expressions=whens, 4996 ) 4997 4998 def _parse_show(self) -> t.Optional[exp.Expression]: 4999 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5000 if parser: 5001 return parser(self) 5002 return self._parse_as_command(self._prev) 5003 5004 def _parse_set_item_assignment( 5005 self, kind: t.Optional[str] = None 5006 ) -> t.Optional[exp.Expression]: 5007 index = self._index 5008 5009 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5010 return self._parse_set_transaction(global_=kind == "GLOBAL") 5011 5012 left = self._parse_primary() or self._parse_id_var() 5013 assignment_delimiter = self._match_texts(("=", "TO")) 5014 5015 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5016 self._retreat(index) 5017 return None 5018 5019 right = self._parse_statement() or self._parse_id_var() 5020 this = self.expression(exp.EQ, this=left, expression=right) 5021 5022 return self.expression(exp.SetItem, this=this, kind=kind) 5023 5024 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5025 self._match_text_seq("TRANSACTION") 5026 characteristics = self._parse_csv( 5027 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5028 ) 5029 return self.expression( 5030 exp.SetItem, 5031 expressions=characteristics, 5032 kind="TRANSACTION", 5033 **{"global": global_}, # type: ignore 5034 ) 5035 5036 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5037 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5038 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5039 5040 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5041 index = self._index 5042 set_ = self.expression( 5043 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5044 ) 5045 5046 if self._curr: 5047 self._retreat(index) 5048 return self._parse_as_command(self._prev) 5049 5050 return set_ 5051 5052 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5053 for option in options: 5054 if self._match_text_seq(*option.split(" ")): 5055 return exp.var(option) 5056 return None 5057 5058 def _parse_as_command(self, start: Token) -> exp.Command: 5059 while self._curr: 5060 self._advance() 5061 text = self._find_sql(start, self._prev) 5062 size = len(start.text) 5063 return exp.Command(this=text[:size], expression=text[size:]) 5064 5065 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5066 settings = [] 5067 5068 self._match_l_paren() 5069 kind = self._parse_id_var() 5070 5071 if self._match(TokenType.L_PAREN): 5072 while True: 5073 key = self._parse_id_var() 5074 value = self._parse_primary() 5075 5076 if not key and value is None: 5077 break 5078 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5079 self._match(TokenType.R_PAREN) 5080 5081 self._match_r_paren() 5082 5083 return self.expression( 5084 exp.DictProperty, 5085 this=this, 5086 kind=kind.this if kind else None, 5087 settings=settings, 5088 ) 5089 5090 def _parse_dict_range(self, this: str) -> exp.DictRange: 5091 self._match_l_paren() 5092 has_min = self._match_text_seq("MIN") 5093 if has_min: 5094 min = self._parse_var() or self._parse_primary() 5095 self._match_text_seq("MAX") 5096 max = self._parse_var() or self._parse_primary() 5097 else: 5098 max = self._parse_var() or self._parse_primary() 5099 min = exp.Literal.number(0) 5100 self._match_r_paren() 5101 return self.expression(exp.DictRange, this=this, min=min, max=max) 5102 5103 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5104 index = self._index 5105 expression = self._parse_column() 5106 if not self._match(TokenType.IN): 5107 self._retreat(index - 1) 5108 return None 5109 iterator = self._parse_column() 5110 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5111 return self.expression( 5112 exp.Comprehension, 5113 this=this, 5114 expression=expression, 5115 iterator=iterator, 5116 condition=condition, 5117 ) 5118 5119 def _find_parser( 5120 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5121 ) -> t.Optional[t.Callable]: 5122 if not self._curr: 5123 return None 5124 5125 index = self._index 5126 this = [] 5127 while True: 5128 # The current token might be multiple words 5129 curr = self._curr.text.upper() 5130 key = curr.split(" ") 5131 this.append(curr) 5132 5133 self._advance() 5134 result, trie = in_trie(trie, key) 5135 if result == TrieResult.FAILED: 5136 break 5137 5138 if result == TrieResult.EXISTS: 5139 subparser = parsers[" ".join(this)] 5140 return subparser 5141 5142 self._retreat(index) 5143 return None 5144 5145 def _match(self, token_type, advance=True, expression=None): 5146 if not self._curr: 5147 return None 5148 5149 if self._curr.token_type == token_type: 5150 if advance: 5151 self._advance() 5152 self._add_comments(expression) 5153 return True 5154 5155 return None 5156 5157 def _match_set(self, types, advance=True): 5158 if not self._curr: 5159 return None 5160 5161 if self._curr.token_type in types: 5162 if advance: 5163 self._advance() 5164 return True 5165 5166 return None 5167 5168 def _match_pair(self, token_type_a, token_type_b, advance=True): 5169 if not self._curr or not self._next: 5170 return None 5171 5172 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5173 if advance: 5174 self._advance(2) 5175 return True 5176 5177 return None 5178 5179 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5180 if not self._match(TokenType.L_PAREN, expression=expression): 5181 self.raise_error("Expecting (") 5182 5183 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5184 if not self._match(TokenType.R_PAREN, expression=expression): 5185 self.raise_error("Expecting )") 5186 5187 def _match_texts(self, texts, advance=True): 5188 if self._curr and self._curr.text.upper() in texts: 5189 if advance: 5190 self._advance() 5191 return True 5192 return False 5193 5194 def _match_text_seq(self, *texts, advance=True): 5195 index = self._index 5196 for text in texts: 5197 if self._curr and self._curr.text.upper() == text: 5198 self._advance() 5199 else: 5200 self._retreat(index) 5201 return False 5202 5203 if not advance: 5204 self._retreat(index) 5205 5206 return True 5207 5208 @t.overload 5209 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5210 ... 5211 5212 @t.overload 5213 def _replace_columns_with_dots( 5214 self, this: t.Optional[exp.Expression] 5215 ) -> t.Optional[exp.Expression]: 5216 ... 5217 5218 def _replace_columns_with_dots(self, this): 5219 if isinstance(this, exp.Dot): 5220 exp.replace_children(this, self._replace_columns_with_dots) 5221 elif isinstance(this, exp.Column): 5222 exp.replace_children(this, self._replace_columns_with_dots) 5223 table = this.args.get("table") 5224 this = ( 5225 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5226 ) 5227 5228 return this 5229 5230 def _replace_lambda( 5231 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5232 ) -> t.Optional[exp.Expression]: 5233 if not node: 5234 return node 5235 5236 for column in node.find_all(exp.Column): 5237 if column.parts[0].name in lambda_variables: 5238 dot_or_id = column.to_dot() if column.table else column.this 5239 parent = column.parent 5240 5241 while isinstance(parent, exp.Dot): 5242 if not isinstance(parent.parent, exp.Dot): 5243 parent.replace(dot_or_id) 5244 break 5245 parent = parent.parent 5246 else: 5247 if column is node: 5248 node = dot_or_id 5249 else: 5250 column.replace(dot_or_id) 5251 return node 5252 5253 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5254 return [ 5255 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5256 for value in values 5257 if value 5258 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
935 def __init__( 936 self, 937 error_level: t.Optional[ErrorLevel] = None, 938 error_message_context: int = 100, 939 max_errors: int = 3, 940 ): 941 self.error_level = error_level or ErrorLevel.IMMEDIATE 942 self.error_message_context = error_message_context 943 self.max_errors = max_errors 944 self._tokenizer = self.TOKENIZER_CLASS() 945 self.reset()
957 def parse( 958 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 959 ) -> t.List[t.Optional[exp.Expression]]: 960 """ 961 Parses a list of tokens and returns a list of syntax trees, one tree 962 per parsed SQL statement. 963 964 Args: 965 raw_tokens: The list of tokens. 966 sql: The original SQL string, used to produce helpful debug messages. 967 968 Returns: 969 The list of the produced syntax trees. 970 """ 971 return self._parse( 972 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 973 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
975 def parse_into( 976 self, 977 expression_types: exp.IntoType, 978 raw_tokens: t.List[Token], 979 sql: t.Optional[str] = None, 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens into a given Expression type. If a collection of Expression 983 types is given instead, this method will try to parse the token list into each one 984 of them, stopping at the first for which the parsing succeeds. 985 986 Args: 987 expression_types: The expression type(s) to try and parse the token list into. 988 raw_tokens: The list of tokens. 989 sql: The original SQL string, used to produce helpful debug messages. 990 991 Returns: 992 The target Expression. 993 """ 994 errors = [] 995 for expression_type in ensure_list(expression_types): 996 parser = self.EXPRESSION_PARSERS.get(expression_type) 997 if not parser: 998 raise TypeError(f"No parser registered for {expression_type}") 999 1000 try: 1001 return self._parse(parser, raw_tokens, sql) 1002 except ParseError as e: 1003 e.errors[0]["into_expression"] = expression_type 1004 errors.append(e) 1005 1006 raise ParseError( 1007 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1008 errors=merge_errors(errors), 1009 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1046 def check_errors(self) -> None: 1047 """Logs or raises any found errors, depending on the chosen error level setting.""" 1048 if self.error_level == ErrorLevel.WARN: 1049 for error in self.errors: 1050 logger.error(str(error)) 1051 elif self.error_level == ErrorLevel.RAISE and self.errors: 1052 raise ParseError( 1053 concat_messages(self.errors, self.max_errors), 1054 errors=merge_errors(self.errors), 1055 )
Logs or raises any found errors, depending on the chosen error level setting.
1057 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1058 """ 1059 Appends an error in the list of recorded errors or raises it, depending on the chosen 1060 error level setting. 1061 """ 1062 token = token or self._curr or self._prev or Token.string("") 1063 start = token.start 1064 end = token.end + 1 1065 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1066 highlight = self.sql[start:end] 1067 end_context = self.sql[end : end + self.error_message_context] 1068 1069 error = ParseError.new( 1070 f"{message}. Line {token.line}, Col: {token.col}.\n" 1071 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1072 description=message, 1073 line=token.line, 1074 col=token.col, 1075 start_context=start_context, 1076 highlight=highlight, 1077 end_context=end_context, 1078 ) 1079 1080 if self.error_level == ErrorLevel.IMMEDIATE: 1081 raise error 1082 1083 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1085 def expression( 1086 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1087 ) -> E: 1088 """ 1089 Creates a new, validated Expression. 1090 1091 Args: 1092 exp_class: The expression class to instantiate. 1093 comments: An optional list of comments to attach to the expression. 1094 kwargs: The arguments to set for the expression along with their respective values. 1095 1096 Returns: 1097 The target expression. 1098 """ 1099 instance = exp_class(**kwargs) 1100 instance.add_comments(comments) if comments else self._add_comments(instance) 1101 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1108 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1109 """ 1110 Validates an Expression, making sure that all its mandatory arguments are set. 1111 1112 Args: 1113 expression: The expression to validate. 1114 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1115 1116 Returns: 1117 The validated expression. 1118 """ 1119 if self.error_level != ErrorLevel.IGNORE: 1120 for error_message in expression.error_messages(args): 1121 self.raise_error(error_message) 1122 1123 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.