sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.RLIKE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 TokenType.XOR, 343 *TYPE_TOKENS, 344 *SUBQUERY_PREDICATES, 345 } 346 347 CONJUNCTION = { 348 TokenType.AND: exp.And, 349 TokenType.OR: exp.Or, 350 } 351 352 EQUALITY = { 353 TokenType.EQ: exp.EQ, 354 TokenType.NEQ: exp.NEQ, 355 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 356 } 357 358 COMPARISON = { 359 TokenType.GT: exp.GT, 360 TokenType.GTE: exp.GTE, 361 TokenType.LT: exp.LT, 362 TokenType.LTE: exp.LTE, 363 } 364 365 BITWISE = { 366 TokenType.AMP: exp.BitwiseAnd, 367 TokenType.CARET: exp.BitwiseXor, 368 TokenType.PIPE: exp.BitwiseOr, 369 TokenType.DPIPE: exp.DPipe, 370 } 371 372 TERM = { 373 TokenType.DASH: exp.Sub, 374 TokenType.PLUS: exp.Add, 375 TokenType.MOD: exp.Mod, 376 TokenType.COLLATE: exp.Collate, 377 } 378 379 FACTOR = { 380 TokenType.DIV: exp.IntDiv, 381 TokenType.LR_ARROW: exp.Distance, 382 TokenType.SLASH: exp.Div, 383 TokenType.STAR: exp.Mul, 384 } 385 386 TIMESTAMPS = { 387 TokenType.TIME, 388 TokenType.TIMESTAMP, 389 TokenType.TIMESTAMPTZ, 390 TokenType.TIMESTAMPLTZ, 391 } 392 393 SET_OPERATIONS = { 394 TokenType.UNION, 395 TokenType.INTERSECT, 396 TokenType.EXCEPT, 397 } 398 399 JOIN_METHODS = { 400 TokenType.NATURAL, 401 TokenType.ASOF, 402 } 403 404 JOIN_SIDES = { 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.FULL, 408 } 409 410 JOIN_KINDS = { 411 TokenType.INNER, 412 TokenType.OUTER, 413 TokenType.CROSS, 414 TokenType.SEMI, 415 TokenType.ANTI, 416 } 417 418 JOIN_HINTS: t.Set[str] = set() 419 420 LAMBDAS = { 421 TokenType.ARROW: lambda self, expressions: self.expression( 422 exp.Lambda, 423 this=self._replace_lambda( 424 self._parse_conjunction(), 425 {node.name for node in expressions}, 426 ), 427 expressions=expressions, 428 ), 429 TokenType.FARROW: lambda self, expressions: self.expression( 430 exp.Kwarg, 431 this=exp.var(expressions[0].name), 432 expression=self._parse_conjunction(), 433 ), 434 } 435 436 COLUMN_OPERATORS = { 437 TokenType.DOT: None, 438 TokenType.DCOLON: lambda self, this, to: self.expression( 439 exp.Cast if self.STRICT_CAST else exp.TryCast, 440 this=this, 441 to=to, 442 ), 443 TokenType.ARROW: lambda self, this, path: self.expression( 444 exp.JSONExtract, 445 this=this, 446 expression=path, 447 ), 448 TokenType.DARROW: lambda self, this, path: self.expression( 449 exp.JSONExtractScalar, 450 this=this, 451 expression=path, 452 ), 453 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 454 exp.JSONBExtract, 455 this=this, 456 expression=path, 457 ), 458 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 459 exp.JSONBExtractScalar, 460 this=this, 461 expression=path, 462 ), 463 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 464 exp.JSONBContains, 465 this=this, 466 expression=key, 467 ), 468 } 469 470 EXPRESSION_PARSERS = { 471 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 472 exp.Column: lambda self: self._parse_column(), 473 exp.Condition: lambda self: self._parse_conjunction(), 474 exp.DataType: lambda self: self._parse_types(), 475 exp.Expression: lambda self: self._parse_statement(), 476 exp.From: lambda self: self._parse_from(), 477 exp.Group: lambda self: self._parse_group(), 478 exp.Having: lambda self: self._parse_having(), 479 exp.Identifier: lambda self: self._parse_id_var(), 480 exp.Join: lambda self: self._parse_join(), 481 exp.Lambda: lambda self: self._parse_lambda(), 482 exp.Lateral: lambda self: self._parse_lateral(), 483 exp.Limit: lambda self: self._parse_limit(), 484 exp.Offset: lambda self: self._parse_offset(), 485 exp.Order: lambda self: self._parse_order(), 486 exp.Ordered: lambda self: self._parse_ordered(), 487 exp.Properties: lambda self: self._parse_properties(), 488 exp.Qualify: lambda self: self._parse_qualify(), 489 exp.Returning: lambda self: self._parse_returning(), 490 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 491 exp.Table: lambda self: self._parse_table_parts(), 492 exp.TableAlias: lambda self: self._parse_table_alias(), 493 exp.Where: lambda self: self._parse_where(), 494 exp.Window: lambda self: self._parse_named_window(), 495 exp.With: lambda self: self._parse_with(), 496 "JOIN_TYPE": lambda self: self._parse_join_parts(), 497 } 498 499 STATEMENT_PARSERS = { 500 TokenType.ALTER: lambda self: self._parse_alter(), 501 TokenType.BEGIN: lambda self: self._parse_transaction(), 502 TokenType.CACHE: lambda self: self._parse_cache(), 503 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 504 TokenType.COMMENT: lambda self: self._parse_comment(), 505 TokenType.CREATE: lambda self: self._parse_create(), 506 TokenType.DELETE: lambda self: self._parse_delete(), 507 TokenType.DESC: lambda self: self._parse_describe(), 508 TokenType.DESCRIBE: lambda self: self._parse_describe(), 509 TokenType.DROP: lambda self: self._parse_drop(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 comments=start.comments, 1149 exists=self._parse_exists(), 1150 this=self._parse_table(schema=True), 1151 kind=kind, 1152 temporary=temporary, 1153 materialized=materialized, 1154 cascade=self._match_text_seq("CASCADE"), 1155 constraints=self._match_text_seq("CONSTRAINTS"), 1156 purge=self._match_text_seq("PURGE"), 1157 ) 1158 1159 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1160 return ( 1161 self._match(TokenType.IF) 1162 and (not not_ or self._match(TokenType.NOT)) 1163 and self._match(TokenType.EXISTS) 1164 ) 1165 1166 def _parse_create(self) -> exp.Create | exp.Command: 1167 # Note: this can't be None because we've matched a statement parser 1168 start = self._prev 1169 replace = start.text.upper() == "REPLACE" or self._match_pair( 1170 TokenType.OR, TokenType.REPLACE 1171 ) 1172 unique = self._match(TokenType.UNIQUE) 1173 1174 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1175 self._advance() 1176 1177 properties = None 1178 create_token = self._match_set(self.CREATABLES) and self._prev 1179 1180 if not create_token: 1181 # exp.Properties.Location.POST_CREATE 1182 properties = self._parse_properties() 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not properties or not create_token: 1186 return self._parse_as_command(start) 1187 1188 exists = self._parse_exists(not_=True) 1189 this = None 1190 expression = None 1191 indexes = None 1192 no_schema_binding = None 1193 begin = None 1194 clone = None 1195 1196 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1197 nonlocal properties 1198 if properties and temp_props: 1199 properties.expressions.extend(temp_props.expressions) 1200 elif temp_props: 1201 properties = temp_props 1202 1203 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1204 this = self._parse_user_defined_function(kind=create_token.token_type) 1205 1206 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 begin = self._match(TokenType.BEGIN) 1211 return_ = self._match_text_seq("RETURN") 1212 expression = self._parse_statement() 1213 1214 if return_: 1215 expression = self.expression(exp.Return, this=expression) 1216 elif create_token.token_type == TokenType.INDEX: 1217 this = self._parse_index(index=self._parse_id_var()) 1218 elif create_token.token_type in self.DB_CREATABLES: 1219 table_parts = self._parse_table_parts(schema=True) 1220 1221 # exp.Properties.Location.POST_NAME 1222 self._match(TokenType.COMMA) 1223 extend_props(self._parse_properties(before=True)) 1224 1225 this = self._parse_schema(this=table_parts) 1226 1227 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1228 extend_props(self._parse_properties()) 1229 1230 self._match(TokenType.ALIAS) 1231 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1232 # exp.Properties.Location.POST_ALIAS 1233 extend_props(self._parse_properties()) 1234 1235 expression = self._parse_ddl_select() 1236 1237 if create_token.token_type == TokenType.TABLE: 1238 # exp.Properties.Location.POST_EXPRESSION 1239 extend_props(self._parse_properties()) 1240 1241 indexes = [] 1242 while True: 1243 index = self._parse_index() 1244 1245 # exp.Properties.Location.POST_INDEX 1246 extend_props(self._parse_properties()) 1247 1248 if not index: 1249 break 1250 else: 1251 self._match(TokenType.COMMA) 1252 indexes.append(index) 1253 elif create_token.token_type == TokenType.VIEW: 1254 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1255 no_schema_binding = True 1256 1257 if self._match_text_seq("CLONE"): 1258 clone = self._parse_table(schema=True) 1259 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1260 clone_kind = ( 1261 self._match(TokenType.L_PAREN) 1262 and self._match_texts(self.CLONE_KINDS) 1263 and self._prev.text.upper() 1264 ) 1265 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1266 self._match(TokenType.R_PAREN) 1267 clone = self.expression( 1268 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1269 ) 1270 1271 return self.expression( 1272 exp.Create, 1273 this=this, 1274 kind=create_token.text, 1275 replace=replace, 1276 unique=unique, 1277 expression=expression, 1278 exists=exists, 1279 properties=properties, 1280 indexes=indexes, 1281 no_schema_binding=no_schema_binding, 1282 begin=begin, 1283 clone=clone, 1284 ) 1285 1286 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1287 # only used for teradata currently 1288 self._match(TokenType.COMMA) 1289 1290 kwargs = { 1291 "no": self._match_text_seq("NO"), 1292 "dual": self._match_text_seq("DUAL"), 1293 "before": self._match_text_seq("BEFORE"), 1294 "default": self._match_text_seq("DEFAULT"), 1295 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1296 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1297 "after": self._match_text_seq("AFTER"), 1298 "minimum": self._match_texts(("MIN", "MINIMUM")), 1299 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1300 } 1301 1302 if self._match_texts(self.PROPERTY_PARSERS): 1303 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1304 try: 1305 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1306 except TypeError: 1307 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1308 1309 return None 1310 1311 def _parse_property(self) -> t.Optional[exp.Expression]: 1312 if self._match_texts(self.PROPERTY_PARSERS): 1313 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1314 1315 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1316 return self._parse_character_set(default=True) 1317 1318 if self._match_text_seq("COMPOUND", "SORTKEY"): 1319 return self._parse_sortkey(compound=True) 1320 1321 if self._match_text_seq("SQL", "SECURITY"): 1322 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1323 1324 assignment = self._match_pair( 1325 TokenType.VAR, TokenType.EQ, advance=False 1326 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1327 1328 if assignment: 1329 key = self._parse_var_or_string() 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.Property, this=key, value=self._parse_column()) 1332 1333 return None 1334 1335 def _parse_stored(self) -> exp.FileFormatProperty: 1336 self._match(TokenType.ALIAS) 1337 1338 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1339 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1340 1341 return self.expression( 1342 exp.FileFormatProperty, 1343 this=self.expression( 1344 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1345 ) 1346 if input_format or output_format 1347 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1348 ) 1349 1350 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1351 self._match(TokenType.EQ) 1352 self._match(TokenType.ALIAS) 1353 return self.expression(exp_class, this=self._parse_field()) 1354 1355 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1356 properties = [] 1357 while True: 1358 if before: 1359 prop = self._parse_property_before() 1360 else: 1361 prop = self._parse_property() 1362 1363 if not prop: 1364 break 1365 for p in ensure_list(prop): 1366 properties.append(p) 1367 1368 if properties: 1369 return self.expression(exp.Properties, expressions=properties) 1370 1371 return None 1372 1373 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1374 return self.expression( 1375 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1376 ) 1377 1378 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1379 if self._index >= 2: 1380 pre_volatile_token = self._tokens[self._index - 2] 1381 else: 1382 pre_volatile_token = None 1383 1384 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1385 return exp.VolatileProperty() 1386 1387 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1388 1389 def _parse_with_property( 1390 self, 1391 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1392 if self._match(TokenType.L_PAREN, advance=False): 1393 return self._parse_wrapped_csv(self._parse_property) 1394 1395 if self._match_text_seq("JOURNAL"): 1396 return self._parse_withjournaltable() 1397 1398 if self._match_text_seq("DATA"): 1399 return self._parse_withdata(no=False) 1400 elif self._match_text_seq("NO", "DATA"): 1401 return self._parse_withdata(no=True) 1402 1403 if not self._next: 1404 return None 1405 1406 return self._parse_withisolatedloading() 1407 1408 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1409 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1410 self._match(TokenType.EQ) 1411 1412 user = self._parse_id_var() 1413 self._match(TokenType.PARAMETER) 1414 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1415 1416 if not user or not host: 1417 return None 1418 1419 return exp.DefinerProperty(this=f"{user}@{host}") 1420 1421 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1422 self._match(TokenType.TABLE) 1423 self._match(TokenType.EQ) 1424 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1425 1426 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1427 return self.expression(exp.LogProperty, no=no) 1428 1429 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1430 return self.expression(exp.JournalProperty, **kwargs) 1431 1432 def _parse_checksum(self) -> exp.ChecksumProperty: 1433 self._match(TokenType.EQ) 1434 1435 on = None 1436 if self._match(TokenType.ON): 1437 on = True 1438 elif self._match_text_seq("OFF"): 1439 on = False 1440 1441 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1442 1443 def _parse_cluster(self) -> exp.Cluster: 1444 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1445 1446 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1447 self._match_text_seq("BY") 1448 1449 self._match_l_paren() 1450 expressions = self._parse_csv(self._parse_column) 1451 self._match_r_paren() 1452 1453 if self._match_text_seq("SORTED", "BY"): 1454 self._match_l_paren() 1455 sorted_by = self._parse_csv(self._parse_ordered) 1456 self._match_r_paren() 1457 else: 1458 sorted_by = None 1459 1460 self._match(TokenType.INTO) 1461 buckets = self._parse_number() 1462 self._match_text_seq("BUCKETS") 1463 1464 return self.expression( 1465 exp.ClusteredByProperty, 1466 expressions=expressions, 1467 sorted_by=sorted_by, 1468 buckets=buckets, 1469 ) 1470 1471 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1472 if not self._match_text_seq("GRANTS"): 1473 self._retreat(self._index - 1) 1474 return None 1475 1476 return self.expression(exp.CopyGrantsProperty) 1477 1478 def _parse_freespace(self) -> exp.FreespaceProperty: 1479 self._match(TokenType.EQ) 1480 return self.expression( 1481 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1482 ) 1483 1484 def _parse_mergeblockratio( 1485 self, no: bool = False, default: bool = False 1486 ) -> exp.MergeBlockRatioProperty: 1487 if self._match(TokenType.EQ): 1488 return self.expression( 1489 exp.MergeBlockRatioProperty, 1490 this=self._parse_number(), 1491 percent=self._match(TokenType.PERCENT), 1492 ) 1493 1494 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1495 1496 def _parse_datablocksize( 1497 self, 1498 default: t.Optional[bool] = None, 1499 minimum: t.Optional[bool] = None, 1500 maximum: t.Optional[bool] = None, 1501 ) -> exp.DataBlocksizeProperty: 1502 self._match(TokenType.EQ) 1503 size = self._parse_number() 1504 1505 units = None 1506 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1507 units = self._prev.text 1508 1509 return self.expression( 1510 exp.DataBlocksizeProperty, 1511 size=size, 1512 units=units, 1513 default=default, 1514 minimum=minimum, 1515 maximum=maximum, 1516 ) 1517 1518 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1519 self._match(TokenType.EQ) 1520 always = self._match_text_seq("ALWAYS") 1521 manual = self._match_text_seq("MANUAL") 1522 never = self._match_text_seq("NEVER") 1523 default = self._match_text_seq("DEFAULT") 1524 1525 autotemp = None 1526 if self._match_text_seq("AUTOTEMP"): 1527 autotemp = self._parse_schema() 1528 1529 return self.expression( 1530 exp.BlockCompressionProperty, 1531 always=always, 1532 manual=manual, 1533 never=never, 1534 default=default, 1535 autotemp=autotemp, 1536 ) 1537 1538 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1539 no = self._match_text_seq("NO") 1540 concurrent = self._match_text_seq("CONCURRENT") 1541 self._match_text_seq("ISOLATED", "LOADING") 1542 for_all = self._match_text_seq("FOR", "ALL") 1543 for_insert = self._match_text_seq("FOR", "INSERT") 1544 for_none = self._match_text_seq("FOR", "NONE") 1545 return self.expression( 1546 exp.IsolatedLoadingProperty, 1547 no=no, 1548 concurrent=concurrent, 1549 for_all=for_all, 1550 for_insert=for_insert, 1551 for_none=for_none, 1552 ) 1553 1554 def _parse_locking(self) -> exp.LockingProperty: 1555 if self._match(TokenType.TABLE): 1556 kind = "TABLE" 1557 elif self._match(TokenType.VIEW): 1558 kind = "VIEW" 1559 elif self._match(TokenType.ROW): 1560 kind = "ROW" 1561 elif self._match_text_seq("DATABASE"): 1562 kind = "DATABASE" 1563 else: 1564 kind = None 1565 1566 if kind in ("DATABASE", "TABLE", "VIEW"): 1567 this = self._parse_table_parts() 1568 else: 1569 this = None 1570 1571 if self._match(TokenType.FOR): 1572 for_or_in = "FOR" 1573 elif self._match(TokenType.IN): 1574 for_or_in = "IN" 1575 else: 1576 for_or_in = None 1577 1578 if self._match_text_seq("ACCESS"): 1579 lock_type = "ACCESS" 1580 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1581 lock_type = "EXCLUSIVE" 1582 elif self._match_text_seq("SHARE"): 1583 lock_type = "SHARE" 1584 elif self._match_text_seq("READ"): 1585 lock_type = "READ" 1586 elif self._match_text_seq("WRITE"): 1587 lock_type = "WRITE" 1588 elif self._match_text_seq("CHECKSUM"): 1589 lock_type = "CHECKSUM" 1590 else: 1591 lock_type = None 1592 1593 override = self._match_text_seq("OVERRIDE") 1594 1595 return self.expression( 1596 exp.LockingProperty, 1597 this=this, 1598 kind=kind, 1599 for_or_in=for_or_in, 1600 lock_type=lock_type, 1601 override=override, 1602 ) 1603 1604 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1605 if self._match(TokenType.PARTITION_BY): 1606 return self._parse_csv(self._parse_conjunction) 1607 return [] 1608 1609 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1610 self._match(TokenType.EQ) 1611 return self.expression( 1612 exp.PartitionedByProperty, 1613 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1614 ) 1615 1616 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1617 if self._match_text_seq("AND", "STATISTICS"): 1618 statistics = True 1619 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1620 statistics = False 1621 else: 1622 statistics = None 1623 1624 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1625 1626 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1627 if self._match_text_seq("PRIMARY", "INDEX"): 1628 return exp.NoPrimaryIndexProperty() 1629 return None 1630 1631 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1632 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1633 return exp.OnCommitProperty() 1634 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1635 return exp.OnCommitProperty(delete=True) 1636 return None 1637 1638 def _parse_distkey(self) -> exp.DistKeyProperty: 1639 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1640 1641 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1642 table = self._parse_table(schema=True) 1643 1644 options = [] 1645 while self._match_texts(("INCLUDING", "EXCLUDING")): 1646 this = self._prev.text.upper() 1647 1648 id_var = self._parse_id_var() 1649 if not id_var: 1650 return None 1651 1652 options.append( 1653 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1654 ) 1655 1656 return self.expression(exp.LikeProperty, this=table, expressions=options) 1657 1658 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1659 return self.expression( 1660 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1661 ) 1662 1663 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1667 ) 1668 1669 def _parse_returns(self) -> exp.ReturnsProperty: 1670 value: t.Optional[exp.Expression] 1671 is_table = self._match(TokenType.TABLE) 1672 1673 if is_table: 1674 if self._match(TokenType.LT): 1675 value = self.expression( 1676 exp.Schema, 1677 this="TABLE", 1678 expressions=self._parse_csv(self._parse_struct_types), 1679 ) 1680 if not self._match(TokenType.GT): 1681 self.raise_error("Expecting >") 1682 else: 1683 value = self._parse_schema(exp.var("TABLE")) 1684 else: 1685 value = self._parse_types() 1686 1687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1688 1689 def _parse_describe(self) -> exp.Describe: 1690 kind = self._match_set(self.CREATABLES) and self._prev.text 1691 this = self._parse_table() 1692 return self.expression(exp.Describe, this=this, kind=kind) 1693 1694 def _parse_insert(self) -> exp.Insert: 1695 comments = ensure_list(self._prev_comments) 1696 overwrite = self._match(TokenType.OVERWRITE) 1697 ignore = self._match(TokenType.IGNORE) 1698 local = self._match_text_seq("LOCAL") 1699 alternative = None 1700 1701 if self._match_text_seq("DIRECTORY"): 1702 this: t.Optional[exp.Expression] = self.expression( 1703 exp.Directory, 1704 this=self._parse_var_or_string(), 1705 local=local, 1706 row_format=self._parse_row_format(match_row=True), 1707 ) 1708 else: 1709 if self._match(TokenType.OR): 1710 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1711 1712 self._match(TokenType.INTO) 1713 comments += ensure_list(self._prev_comments) 1714 self._match(TokenType.TABLE) 1715 this = self._parse_table(schema=True) 1716 1717 returning = self._parse_returning() 1718 1719 return self.expression( 1720 exp.Insert, 1721 comments=comments, 1722 this=this, 1723 exists=self._parse_exists(), 1724 partition=self._parse_partition(), 1725 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1726 and self._parse_conjunction(), 1727 expression=self._parse_ddl_select(), 1728 conflict=self._parse_on_conflict(), 1729 returning=returning or self._parse_returning(), 1730 overwrite=overwrite, 1731 alternative=alternative, 1732 ignore=ignore, 1733 ) 1734 1735 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1736 conflict = self._match_text_seq("ON", "CONFLICT") 1737 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1738 1739 if not conflict and not duplicate: 1740 return None 1741 1742 nothing = None 1743 expressions = None 1744 key = None 1745 constraint = None 1746 1747 if conflict: 1748 if self._match_text_seq("ON", "CONSTRAINT"): 1749 constraint = self._parse_id_var() 1750 else: 1751 key = self._parse_csv(self._parse_value) 1752 1753 self._match_text_seq("DO") 1754 if self._match_text_seq("NOTHING"): 1755 nothing = True 1756 else: 1757 self._match(TokenType.UPDATE) 1758 self._match(TokenType.SET) 1759 expressions = self._parse_csv(self._parse_equality) 1760 1761 return self.expression( 1762 exp.OnConflict, 1763 duplicate=duplicate, 1764 expressions=expressions, 1765 nothing=nothing, 1766 key=key, 1767 constraint=constraint, 1768 ) 1769 1770 def _parse_returning(self) -> t.Optional[exp.Returning]: 1771 if not self._match(TokenType.RETURNING): 1772 return None 1773 return self.expression( 1774 exp.Returning, 1775 expressions=self._parse_csv(self._parse_expression), 1776 into=self._match(TokenType.INTO) and self._parse_table_part(), 1777 ) 1778 1779 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1780 if not self._match(TokenType.FORMAT): 1781 return None 1782 return self._parse_row_format() 1783 1784 def _parse_row_format( 1785 self, match_row: bool = False 1786 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1787 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1788 return None 1789 1790 if self._match_text_seq("SERDE"): 1791 this = self._parse_string() 1792 1793 serde_properties = None 1794 if self._match(TokenType.SERDE_PROPERTIES): 1795 serde_properties = self.expression( 1796 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1797 ) 1798 1799 return self.expression( 1800 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1801 ) 1802 1803 self._match_text_seq("DELIMITED") 1804 1805 kwargs = {} 1806 1807 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1808 kwargs["fields"] = self._parse_string() 1809 if self._match_text_seq("ESCAPED", "BY"): 1810 kwargs["escaped"] = self._parse_string() 1811 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1812 kwargs["collection_items"] = self._parse_string() 1813 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1814 kwargs["map_keys"] = self._parse_string() 1815 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1816 kwargs["lines"] = self._parse_string() 1817 if self._match_text_seq("NULL", "DEFINED", "AS"): 1818 kwargs["null"] = self._parse_string() 1819 1820 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1821 1822 def _parse_load(self) -> exp.LoadData | exp.Command: 1823 if self._match_text_seq("DATA"): 1824 local = self._match_text_seq("LOCAL") 1825 self._match_text_seq("INPATH") 1826 inpath = self._parse_string() 1827 overwrite = self._match(TokenType.OVERWRITE) 1828 self._match_pair(TokenType.INTO, TokenType.TABLE) 1829 1830 return self.expression( 1831 exp.LoadData, 1832 this=self._parse_table(schema=True), 1833 local=local, 1834 overwrite=overwrite, 1835 inpath=inpath, 1836 partition=self._parse_partition(), 1837 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1838 serde=self._match_text_seq("SERDE") and self._parse_string(), 1839 ) 1840 return self._parse_as_command(self._prev) 1841 1842 def _parse_delete(self) -> exp.Delete: 1843 # This handles MySQL's "Multiple-Table Syntax" 1844 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1845 tables = None 1846 comments = self._prev_comments 1847 if not self._match(TokenType.FROM, advance=False): 1848 tables = self._parse_csv(self._parse_table) or None 1849 1850 returning = self._parse_returning() 1851 1852 return self.expression( 1853 exp.Delete, 1854 comments=comments, 1855 tables=tables, 1856 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1857 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1858 where=self._parse_where(), 1859 returning=returning or self._parse_returning(), 1860 limit=self._parse_limit(), 1861 ) 1862 1863 def _parse_update(self) -> exp.Update: 1864 comments = self._prev_comments 1865 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1866 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1867 returning = self._parse_returning() 1868 return self.expression( 1869 exp.Update, 1870 comments=comments, 1871 **{ # type: ignore 1872 "this": this, 1873 "expressions": expressions, 1874 "from": self._parse_from(joins=True), 1875 "where": self._parse_where(), 1876 "returning": returning or self._parse_returning(), 1877 "limit": self._parse_limit(), 1878 }, 1879 ) 1880 1881 def _parse_uncache(self) -> exp.Uncache: 1882 if not self._match(TokenType.TABLE): 1883 self.raise_error("Expecting TABLE after UNCACHE") 1884 1885 return self.expression( 1886 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1887 ) 1888 1889 def _parse_cache(self) -> exp.Cache: 1890 lazy = self._match_text_seq("LAZY") 1891 self._match(TokenType.TABLE) 1892 table = self._parse_table(schema=True) 1893 1894 options = [] 1895 if self._match_text_seq("OPTIONS"): 1896 self._match_l_paren() 1897 k = self._parse_string() 1898 self._match(TokenType.EQ) 1899 v = self._parse_string() 1900 options = [k, v] 1901 self._match_r_paren() 1902 1903 self._match(TokenType.ALIAS) 1904 return self.expression( 1905 exp.Cache, 1906 this=table, 1907 lazy=lazy, 1908 options=options, 1909 expression=self._parse_select(nested=True), 1910 ) 1911 1912 def _parse_partition(self) -> t.Optional[exp.Partition]: 1913 if not self._match(TokenType.PARTITION): 1914 return None 1915 1916 return self.expression( 1917 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1918 ) 1919 1920 def _parse_value(self) -> exp.Tuple: 1921 if self._match(TokenType.L_PAREN): 1922 expressions = self._parse_csv(self._parse_conjunction) 1923 self._match_r_paren() 1924 return self.expression(exp.Tuple, expressions=expressions) 1925 1926 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1927 # https://prestodb.io/docs/current/sql/values.html 1928 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1929 1930 def _parse_select( 1931 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1932 ) -> t.Optional[exp.Expression]: 1933 cte = self._parse_with() 1934 if cte: 1935 this = self._parse_statement() 1936 1937 if not this: 1938 self.raise_error("Failed to parse any statement following CTE") 1939 return cte 1940 1941 if "with" in this.arg_types: 1942 this.set("with", cte) 1943 else: 1944 self.raise_error(f"{this.key} does not support CTE") 1945 this = cte 1946 elif self._match(TokenType.SELECT): 1947 comments = self._prev_comments 1948 1949 hint = self._parse_hint() 1950 all_ = self._match(TokenType.ALL) 1951 distinct = self._match(TokenType.DISTINCT) 1952 1953 kind = ( 1954 self._match(TokenType.ALIAS) 1955 and self._match_texts(("STRUCT", "VALUE")) 1956 and self._prev.text 1957 ) 1958 1959 if distinct: 1960 distinct = self.expression( 1961 exp.Distinct, 1962 on=self._parse_value() if self._match(TokenType.ON) else None, 1963 ) 1964 1965 if all_ and distinct: 1966 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1967 1968 limit = self._parse_limit(top=True) 1969 expressions = self._parse_expressions() 1970 1971 this = self.expression( 1972 exp.Select, 1973 kind=kind, 1974 hint=hint, 1975 distinct=distinct, 1976 expressions=expressions, 1977 limit=limit, 1978 ) 1979 this.comments = comments 1980 1981 into = self._parse_into() 1982 if into: 1983 this.set("into", into) 1984 1985 from_ = self._parse_from() 1986 if from_: 1987 this.set("from", from_) 1988 1989 this = self._parse_query_modifiers(this) 1990 elif (table or nested) and self._match(TokenType.L_PAREN): 1991 if self._match(TokenType.PIVOT): 1992 this = self._parse_simplified_pivot() 1993 elif self._match(TokenType.FROM): 1994 this = exp.select("*").from_( 1995 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1996 ) 1997 else: 1998 this = self._parse_table() if table else self._parse_select(nested=True) 1999 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2000 2001 self._match_r_paren() 2002 2003 # We return early here so that the UNION isn't attached to the subquery by the 2004 # following call to _parse_set_operations, but instead becomes the parent node 2005 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2006 elif self._match(TokenType.VALUES): 2007 this = self.expression( 2008 exp.Values, 2009 expressions=self._parse_csv(self._parse_value), 2010 alias=self._parse_table_alias(), 2011 ) 2012 else: 2013 this = None 2014 2015 return self._parse_set_operations(this) 2016 2017 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2018 if not skip_with_token and not self._match(TokenType.WITH): 2019 return None 2020 2021 comments = self._prev_comments 2022 recursive = self._match(TokenType.RECURSIVE) 2023 2024 expressions = [] 2025 while True: 2026 expressions.append(self._parse_cte()) 2027 2028 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2029 break 2030 else: 2031 self._match(TokenType.WITH) 2032 2033 return self.expression( 2034 exp.With, comments=comments, expressions=expressions, recursive=recursive 2035 ) 2036 2037 def _parse_cte(self) -> exp.CTE: 2038 alias = self._parse_table_alias() 2039 if not alias or not alias.this: 2040 self.raise_error("Expected CTE to have alias") 2041 2042 self._match(TokenType.ALIAS) 2043 return self.expression( 2044 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2045 ) 2046 2047 def _parse_table_alias( 2048 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2049 ) -> t.Optional[exp.TableAlias]: 2050 any_token = self._match(TokenType.ALIAS) 2051 alias = ( 2052 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2053 or self._parse_string_as_identifier() 2054 ) 2055 2056 index = self._index 2057 if self._match(TokenType.L_PAREN): 2058 columns = self._parse_csv(self._parse_function_parameter) 2059 self._match_r_paren() if columns else self._retreat(index) 2060 else: 2061 columns = None 2062 2063 if not alias and not columns: 2064 return None 2065 2066 return self.expression(exp.TableAlias, this=alias, columns=columns) 2067 2068 def _parse_subquery( 2069 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2070 ) -> t.Optional[exp.Subquery]: 2071 if not this: 2072 return None 2073 2074 return self.expression( 2075 exp.Subquery, 2076 this=this, 2077 pivots=self._parse_pivots(), 2078 alias=self._parse_table_alias() if parse_alias else None, 2079 ) 2080 2081 def _parse_query_modifiers( 2082 self, this: t.Optional[exp.Expression] 2083 ) -> t.Optional[exp.Expression]: 2084 if isinstance(this, self.MODIFIABLES): 2085 for join in iter(self._parse_join, None): 2086 this.append("joins", join) 2087 for lateral in iter(self._parse_lateral, None): 2088 this.append("laterals", lateral) 2089 2090 while True: 2091 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2092 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2093 key, expression = parser(self) 2094 2095 if expression: 2096 this.set(key, expression) 2097 if key == "limit": 2098 offset = expression.args.pop("offset", None) 2099 if offset: 2100 this.set("offset", exp.Offset(expression=offset)) 2101 continue 2102 break 2103 return this 2104 2105 def _parse_hint(self) -> t.Optional[exp.Hint]: 2106 if self._match(TokenType.HINT): 2107 hints = [] 2108 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2109 hints.extend(hint) 2110 2111 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2112 self.raise_error("Expected */ after HINT") 2113 2114 return self.expression(exp.Hint, expressions=hints) 2115 2116 return None 2117 2118 def _parse_into(self) -> t.Optional[exp.Into]: 2119 if not self._match(TokenType.INTO): 2120 return None 2121 2122 temp = self._match(TokenType.TEMPORARY) 2123 unlogged = self._match_text_seq("UNLOGGED") 2124 self._match(TokenType.TABLE) 2125 2126 return self.expression( 2127 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2128 ) 2129 2130 def _parse_from( 2131 self, joins: bool = False, skip_from_token: bool = False 2132 ) -> t.Optional[exp.From]: 2133 if not skip_from_token and not self._match(TokenType.FROM): 2134 return None 2135 2136 return self.expression( 2137 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2138 ) 2139 2140 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2141 if not self._match(TokenType.MATCH_RECOGNIZE): 2142 return None 2143 2144 self._match_l_paren() 2145 2146 partition = self._parse_partition_by() 2147 order = self._parse_order() 2148 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2149 2150 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2151 rows = exp.var("ONE ROW PER MATCH") 2152 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2153 text = "ALL ROWS PER MATCH" 2154 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2155 text += f" SHOW EMPTY MATCHES" 2156 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2157 text += f" OMIT EMPTY MATCHES" 2158 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2159 text += f" WITH UNMATCHED ROWS" 2160 rows = exp.var(text) 2161 else: 2162 rows = None 2163 2164 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2165 text = "AFTER MATCH SKIP" 2166 if self._match_text_seq("PAST", "LAST", "ROW"): 2167 text += f" PAST LAST ROW" 2168 elif self._match_text_seq("TO", "NEXT", "ROW"): 2169 text += f" TO NEXT ROW" 2170 elif self._match_text_seq("TO", "FIRST"): 2171 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2172 elif self._match_text_seq("TO", "LAST"): 2173 text += f" TO LAST {self._advance_any().text}" # type: ignore 2174 after = exp.var(text) 2175 else: 2176 after = None 2177 2178 if self._match_text_seq("PATTERN"): 2179 self._match_l_paren() 2180 2181 if not self._curr: 2182 self.raise_error("Expecting )", self._curr) 2183 2184 paren = 1 2185 start = self._curr 2186 2187 while self._curr and paren > 0: 2188 if self._curr.token_type == TokenType.L_PAREN: 2189 paren += 1 2190 if self._curr.token_type == TokenType.R_PAREN: 2191 paren -= 1 2192 2193 end = self._prev 2194 self._advance() 2195 2196 if paren > 0: 2197 self.raise_error("Expecting )", self._curr) 2198 2199 pattern = exp.var(self._find_sql(start, end)) 2200 else: 2201 pattern = None 2202 2203 define = ( 2204 self._parse_csv( 2205 lambda: self.expression( 2206 exp.Alias, 2207 alias=self._parse_id_var(any_token=True), 2208 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2209 ) 2210 ) 2211 if self._match_text_seq("DEFINE") 2212 else None 2213 ) 2214 2215 self._match_r_paren() 2216 2217 return self.expression( 2218 exp.MatchRecognize, 2219 partition_by=partition, 2220 order=order, 2221 measures=measures, 2222 rows=rows, 2223 after=after, 2224 pattern=pattern, 2225 define=define, 2226 alias=self._parse_table_alias(), 2227 ) 2228 2229 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2230 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2231 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2232 2233 if outer_apply or cross_apply: 2234 this = self._parse_select(table=True) 2235 view = None 2236 outer = not cross_apply 2237 elif self._match(TokenType.LATERAL): 2238 this = self._parse_select(table=True) 2239 view = self._match(TokenType.VIEW) 2240 outer = self._match(TokenType.OUTER) 2241 else: 2242 return None 2243 2244 if not this: 2245 this = ( 2246 self._parse_unnest() 2247 or self._parse_function() 2248 or self._parse_id_var(any_token=False) 2249 ) 2250 2251 while self._match(TokenType.DOT): 2252 this = exp.Dot( 2253 this=this, 2254 expression=self._parse_function() or self._parse_id_var(any_token=False), 2255 ) 2256 2257 if view: 2258 table = self._parse_id_var(any_token=False) 2259 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2260 table_alias: t.Optional[exp.TableAlias] = self.expression( 2261 exp.TableAlias, this=table, columns=columns 2262 ) 2263 elif isinstance(this, exp.Subquery) and this.alias: 2264 # Ensures parity between the Subquery's and the Lateral's "alias" args 2265 table_alias = this.args["alias"].copy() 2266 else: 2267 table_alias = self._parse_table_alias() 2268 2269 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2270 2271 def _parse_join_parts( 2272 self, 2273 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2274 return ( 2275 self._match_set(self.JOIN_METHODS) and self._prev, 2276 self._match_set(self.JOIN_SIDES) and self._prev, 2277 self._match_set(self.JOIN_KINDS) and self._prev, 2278 ) 2279 2280 def _parse_join( 2281 self, skip_join_token: bool = False, parse_bracket: bool = False 2282 ) -> t.Optional[exp.Join]: 2283 if self._match(TokenType.COMMA): 2284 return self.expression(exp.Join, this=self._parse_table()) 2285 2286 index = self._index 2287 method, side, kind = self._parse_join_parts() 2288 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2289 join = self._match(TokenType.JOIN) 2290 2291 if not skip_join_token and not join: 2292 self._retreat(index) 2293 kind = None 2294 method = None 2295 side = None 2296 2297 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2298 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2299 2300 if not skip_join_token and not join and not outer_apply and not cross_apply: 2301 return None 2302 2303 if outer_apply: 2304 side = Token(TokenType.LEFT, "LEFT") 2305 2306 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2307 2308 if method: 2309 kwargs["method"] = method.text 2310 if side: 2311 kwargs["side"] = side.text 2312 if kind: 2313 kwargs["kind"] = kind.text 2314 if hint: 2315 kwargs["hint"] = hint 2316 2317 if self._match(TokenType.ON): 2318 kwargs["on"] = self._parse_conjunction() 2319 elif self._match(TokenType.USING): 2320 kwargs["using"] = self._parse_wrapped_id_vars() 2321 elif not (kind and kind.token_type == TokenType.CROSS): 2322 index = self._index 2323 joins = self._parse_joins() 2324 2325 if joins and self._match(TokenType.ON): 2326 kwargs["on"] = self._parse_conjunction() 2327 elif joins and self._match(TokenType.USING): 2328 kwargs["using"] = self._parse_wrapped_id_vars() 2329 else: 2330 joins = None 2331 self._retreat(index) 2332 2333 kwargs["this"].set("joins", joins) 2334 2335 return self.expression(exp.Join, **kwargs) 2336 2337 def _parse_index( 2338 self, 2339 index: t.Optional[exp.Expression] = None, 2340 ) -> t.Optional[exp.Index]: 2341 if index: 2342 unique = None 2343 primary = None 2344 amp = None 2345 2346 self._match(TokenType.ON) 2347 self._match(TokenType.TABLE) # hive 2348 table = self._parse_table_parts(schema=True) 2349 else: 2350 unique = self._match(TokenType.UNIQUE) 2351 primary = self._match_text_seq("PRIMARY") 2352 amp = self._match_text_seq("AMP") 2353 2354 if not self._match(TokenType.INDEX): 2355 return None 2356 2357 index = self._parse_id_var() 2358 table = None 2359 2360 using = self._parse_field() if self._match(TokenType.USING) else None 2361 2362 if self._match(TokenType.L_PAREN, advance=False): 2363 columns = self._parse_wrapped_csv(self._parse_ordered) 2364 else: 2365 columns = None 2366 2367 return self.expression( 2368 exp.Index, 2369 this=index, 2370 table=table, 2371 using=using, 2372 columns=columns, 2373 unique=unique, 2374 primary=primary, 2375 amp=amp, 2376 partition_by=self._parse_partition_by(), 2377 ) 2378 2379 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2380 hints: t.List[exp.Expression] = [] 2381 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2382 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2383 hints.append( 2384 self.expression( 2385 exp.WithTableHint, 2386 expressions=self._parse_csv( 2387 lambda: self._parse_function() or self._parse_var(any_token=True) 2388 ), 2389 ) 2390 ) 2391 self._match_r_paren() 2392 else: 2393 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2394 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2395 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2396 2397 self._match_texts({"INDEX", "KEY"}) 2398 if self._match(TokenType.FOR): 2399 hint.set("target", self._advance_any() and self._prev.text.upper()) 2400 2401 hint.set("expressions", self._parse_wrapped_id_vars()) 2402 hints.append(hint) 2403 2404 return hints or None 2405 2406 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2407 return ( 2408 (not schema and self._parse_function(optional_parens=False)) 2409 or self._parse_id_var(any_token=False) 2410 or self._parse_string_as_identifier() 2411 or self._parse_placeholder() 2412 ) 2413 2414 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2415 catalog = None 2416 db = None 2417 table = self._parse_table_part(schema=schema) 2418 2419 while self._match(TokenType.DOT): 2420 if catalog: 2421 # This allows nesting the table in arbitrarily many dot expressions if needed 2422 table = self.expression( 2423 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2424 ) 2425 else: 2426 catalog = db 2427 db = table 2428 table = self._parse_table_part(schema=schema) 2429 2430 if not table: 2431 self.raise_error(f"Expected table name but got {self._curr}") 2432 2433 return self.expression( 2434 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2435 ) 2436 2437 def _parse_table( 2438 self, 2439 schema: bool = False, 2440 joins: bool = False, 2441 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2442 parse_bracket: bool = False, 2443 ) -> t.Optional[exp.Expression]: 2444 lateral = self._parse_lateral() 2445 if lateral: 2446 return lateral 2447 2448 unnest = self._parse_unnest() 2449 if unnest: 2450 return unnest 2451 2452 values = self._parse_derived_table_values() 2453 if values: 2454 return values 2455 2456 subquery = self._parse_select(table=True) 2457 if subquery: 2458 if not subquery.args.get("pivots"): 2459 subquery.set("pivots", self._parse_pivots()) 2460 return subquery 2461 2462 bracket = parse_bracket and self._parse_bracket(None) 2463 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2464 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2465 2466 if schema: 2467 return self._parse_schema(this=this) 2468 2469 if self.ALIAS_POST_TABLESAMPLE: 2470 table_sample = self._parse_table_sample() 2471 2472 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2473 if alias: 2474 this.set("alias", alias) 2475 2476 if not this.args.get("pivots"): 2477 this.set("pivots", self._parse_pivots()) 2478 2479 this.set("hints", self._parse_table_hints()) 2480 2481 if not self.ALIAS_POST_TABLESAMPLE: 2482 table_sample = self._parse_table_sample() 2483 2484 if table_sample: 2485 table_sample.set("this", this) 2486 this = table_sample 2487 2488 if joins: 2489 for join in iter(self._parse_join, None): 2490 this.append("joins", join) 2491 2492 return this 2493 2494 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2495 if not self._match(TokenType.UNNEST): 2496 return None 2497 2498 expressions = self._parse_wrapped_csv(self._parse_type) 2499 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2500 2501 alias = self._parse_table_alias() if with_alias else None 2502 2503 if alias and self.UNNEST_COLUMN_ONLY: 2504 if alias.args.get("columns"): 2505 self.raise_error("Unexpected extra column alias in unnest.") 2506 2507 alias.set("columns", [alias.this]) 2508 alias.set("this", None) 2509 2510 offset = None 2511 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2512 self._match(TokenType.ALIAS) 2513 offset = self._parse_id_var() or exp.to_identifier("offset") 2514 2515 return self.expression( 2516 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2517 ) 2518 2519 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2520 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2521 if not is_derived and not self._match(TokenType.VALUES): 2522 return None 2523 2524 expressions = self._parse_csv(self._parse_value) 2525 alias = self._parse_table_alias() 2526 2527 if is_derived: 2528 self._match_r_paren() 2529 2530 return self.expression( 2531 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2532 ) 2533 2534 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2535 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2536 as_modifier and self._match_text_seq("USING", "SAMPLE") 2537 ): 2538 return None 2539 2540 bucket_numerator = None 2541 bucket_denominator = None 2542 bucket_field = None 2543 percent = None 2544 rows = None 2545 size = None 2546 seed = None 2547 2548 kind = ( 2549 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2550 ) 2551 method = self._parse_var(tokens=(TokenType.ROW,)) 2552 2553 self._match(TokenType.L_PAREN) 2554 2555 num = self._parse_number() 2556 2557 if self._match_text_seq("BUCKET"): 2558 bucket_numerator = self._parse_number() 2559 self._match_text_seq("OUT", "OF") 2560 bucket_denominator = bucket_denominator = self._parse_number() 2561 self._match(TokenType.ON) 2562 bucket_field = self._parse_field() 2563 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2564 percent = num 2565 elif self._match(TokenType.ROWS): 2566 rows = num 2567 else: 2568 size = num 2569 2570 self._match(TokenType.R_PAREN) 2571 2572 if self._match(TokenType.L_PAREN): 2573 method = self._parse_var() 2574 seed = self._match(TokenType.COMMA) and self._parse_number() 2575 self._match_r_paren() 2576 elif self._match_texts(("SEED", "REPEATABLE")): 2577 seed = self._parse_wrapped(self._parse_number) 2578 2579 return self.expression( 2580 exp.TableSample, 2581 method=method, 2582 bucket_numerator=bucket_numerator, 2583 bucket_denominator=bucket_denominator, 2584 bucket_field=bucket_field, 2585 percent=percent, 2586 rows=rows, 2587 size=size, 2588 seed=seed, 2589 kind=kind, 2590 ) 2591 2592 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2593 return list(iter(self._parse_pivot, None)) or None 2594 2595 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2596 return list(iter(self._parse_join, None)) or None 2597 2598 # https://duckdb.org/docs/sql/statements/pivot 2599 def _parse_simplified_pivot(self) -> exp.Pivot: 2600 def _parse_on() -> t.Optional[exp.Expression]: 2601 this = self._parse_bitwise() 2602 return self._parse_in(this) if self._match(TokenType.IN) else this 2603 2604 this = self._parse_table() 2605 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2606 using = self._match(TokenType.USING) and self._parse_csv( 2607 lambda: self._parse_alias(self._parse_function()) 2608 ) 2609 group = self._parse_group() 2610 return self.expression( 2611 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2612 ) 2613 2614 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2615 index = self._index 2616 2617 if self._match(TokenType.PIVOT): 2618 unpivot = False 2619 elif self._match(TokenType.UNPIVOT): 2620 unpivot = True 2621 else: 2622 return None 2623 2624 expressions = [] 2625 field = None 2626 2627 if not self._match(TokenType.L_PAREN): 2628 self._retreat(index) 2629 return None 2630 2631 if unpivot: 2632 expressions = self._parse_csv(self._parse_column) 2633 else: 2634 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2635 2636 if not expressions: 2637 self.raise_error("Failed to parse PIVOT's aggregation list") 2638 2639 if not self._match(TokenType.FOR): 2640 self.raise_error("Expecting FOR") 2641 2642 value = self._parse_column() 2643 2644 if not self._match(TokenType.IN): 2645 self.raise_error("Expecting IN") 2646 2647 field = self._parse_in(value, alias=True) 2648 2649 self._match_r_paren() 2650 2651 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2652 2653 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2654 pivot.set("alias", self._parse_table_alias()) 2655 2656 if not unpivot: 2657 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2658 2659 columns: t.List[exp.Expression] = [] 2660 for fld in pivot.args["field"].expressions: 2661 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2662 for name in names: 2663 if self.PREFIXED_PIVOT_COLUMNS: 2664 name = f"{name}_{field_name}" if name else field_name 2665 else: 2666 name = f"{field_name}_{name}" if name else field_name 2667 2668 columns.append(exp.to_identifier(name)) 2669 2670 pivot.set("columns", columns) 2671 2672 return pivot 2673 2674 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2675 return [agg.alias for agg in aggregations] 2676 2677 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2678 if not skip_where_token and not self._match(TokenType.WHERE): 2679 return None 2680 2681 return self.expression( 2682 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2683 ) 2684 2685 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2686 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2687 return None 2688 2689 elements = defaultdict(list) 2690 2691 if self._match(TokenType.ALL): 2692 return self.expression(exp.Group, all=True) 2693 2694 while True: 2695 expressions = self._parse_csv(self._parse_conjunction) 2696 if expressions: 2697 elements["expressions"].extend(expressions) 2698 2699 grouping_sets = self._parse_grouping_sets() 2700 if grouping_sets: 2701 elements["grouping_sets"].extend(grouping_sets) 2702 2703 rollup = None 2704 cube = None 2705 totals = None 2706 2707 with_ = self._match(TokenType.WITH) 2708 if self._match(TokenType.ROLLUP): 2709 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2710 elements["rollup"].extend(ensure_list(rollup)) 2711 2712 if self._match(TokenType.CUBE): 2713 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2714 elements["cube"].extend(ensure_list(cube)) 2715 2716 if self._match_text_seq("TOTALS"): 2717 totals = True 2718 elements["totals"] = True # type: ignore 2719 2720 if not (grouping_sets or rollup or cube or totals): 2721 break 2722 2723 return self.expression(exp.Group, **elements) # type: ignore 2724 2725 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2726 if not self._match(TokenType.GROUPING_SETS): 2727 return None 2728 2729 return self._parse_wrapped_csv(self._parse_grouping_set) 2730 2731 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2732 if self._match(TokenType.L_PAREN): 2733 grouping_set = self._parse_csv(self._parse_column) 2734 self._match_r_paren() 2735 return self.expression(exp.Tuple, expressions=grouping_set) 2736 2737 return self._parse_column() 2738 2739 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2740 if not skip_having_token and not self._match(TokenType.HAVING): 2741 return None 2742 return self.expression(exp.Having, this=self._parse_conjunction()) 2743 2744 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2745 if not self._match(TokenType.QUALIFY): 2746 return None 2747 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2748 2749 def _parse_order( 2750 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2751 ) -> t.Optional[exp.Expression]: 2752 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2753 return this 2754 2755 return self.expression( 2756 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2757 ) 2758 2759 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2760 if not self._match(token): 2761 return None 2762 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2763 2764 def _parse_ordered(self) -> exp.Ordered: 2765 this = self._parse_conjunction() 2766 self._match(TokenType.ASC) 2767 2768 is_desc = self._match(TokenType.DESC) 2769 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2770 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2771 desc = is_desc or False 2772 asc = not desc 2773 nulls_first = is_nulls_first or False 2774 explicitly_null_ordered = is_nulls_first or is_nulls_last 2775 2776 if ( 2777 not explicitly_null_ordered 2778 and ( 2779 (asc and self.NULL_ORDERING == "nulls_are_small") 2780 or (desc and self.NULL_ORDERING != "nulls_are_small") 2781 ) 2782 and self.NULL_ORDERING != "nulls_are_last" 2783 ): 2784 nulls_first = True 2785 2786 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2787 2788 def _parse_limit( 2789 self, this: t.Optional[exp.Expression] = None, top: bool = False 2790 ) -> t.Optional[exp.Expression]: 2791 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2792 comments = self._prev_comments 2793 if top: 2794 limit_paren = self._match(TokenType.L_PAREN) 2795 expression = self._parse_number() 2796 2797 if limit_paren: 2798 self._match_r_paren() 2799 else: 2800 expression = self._parse_term() 2801 2802 if self._match(TokenType.COMMA): 2803 offset = expression 2804 expression = self._parse_term() 2805 else: 2806 offset = None 2807 2808 limit_exp = self.expression( 2809 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2810 ) 2811 2812 return limit_exp 2813 2814 if self._match(TokenType.FETCH): 2815 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2816 direction = self._prev.text if direction else "FIRST" 2817 2818 count = self._parse_number() 2819 percent = self._match(TokenType.PERCENT) 2820 2821 self._match_set((TokenType.ROW, TokenType.ROWS)) 2822 2823 only = self._match_text_seq("ONLY") 2824 with_ties = self._match_text_seq("WITH", "TIES") 2825 2826 if only and with_ties: 2827 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2828 2829 return self.expression( 2830 exp.Fetch, 2831 direction=direction, 2832 count=count, 2833 percent=percent, 2834 with_ties=with_ties, 2835 ) 2836 2837 return this 2838 2839 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2840 if not self._match(TokenType.OFFSET): 2841 return this 2842 2843 count = self._parse_term() 2844 self._match_set((TokenType.ROW, TokenType.ROWS)) 2845 return self.expression(exp.Offset, this=this, expression=count) 2846 2847 def _parse_locks(self) -> t.List[exp.Lock]: 2848 locks = [] 2849 while True: 2850 if self._match_text_seq("FOR", "UPDATE"): 2851 update = True 2852 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2853 "LOCK", "IN", "SHARE", "MODE" 2854 ): 2855 update = False 2856 else: 2857 break 2858 2859 expressions = None 2860 if self._match_text_seq("OF"): 2861 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2862 2863 wait: t.Optional[bool | exp.Expression] = None 2864 if self._match_text_seq("NOWAIT"): 2865 wait = True 2866 elif self._match_text_seq("WAIT"): 2867 wait = self._parse_primary() 2868 elif self._match_text_seq("SKIP", "LOCKED"): 2869 wait = False 2870 2871 locks.append( 2872 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2873 ) 2874 2875 return locks 2876 2877 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2878 if not self._match_set(self.SET_OPERATIONS): 2879 return this 2880 2881 token_type = self._prev.token_type 2882 2883 if token_type == TokenType.UNION: 2884 expression = exp.Union 2885 elif token_type == TokenType.EXCEPT: 2886 expression = exp.Except 2887 else: 2888 expression = exp.Intersect 2889 2890 return self.expression( 2891 expression, 2892 this=this, 2893 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2894 expression=self._parse_set_operations(self._parse_select(nested=True)), 2895 ) 2896 2897 def _parse_expression(self) -> t.Optional[exp.Expression]: 2898 return self._parse_alias(self._parse_conjunction()) 2899 2900 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2901 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2902 2903 def _parse_equality(self) -> t.Optional[exp.Expression]: 2904 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2905 2906 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2907 return self._parse_tokens(self._parse_range, self.COMPARISON) 2908 2909 def _parse_range(self) -> t.Optional[exp.Expression]: 2910 this = self._parse_bitwise() 2911 negate = self._match(TokenType.NOT) 2912 2913 if self._match_set(self.RANGE_PARSERS): 2914 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2915 if not expression: 2916 return this 2917 2918 this = expression 2919 elif self._match(TokenType.ISNULL): 2920 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2921 2922 # Postgres supports ISNULL and NOTNULL for conditions. 2923 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2924 if self._match(TokenType.NOTNULL): 2925 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2926 this = self.expression(exp.Not, this=this) 2927 2928 if negate: 2929 this = self.expression(exp.Not, this=this) 2930 2931 if self._match(TokenType.IS): 2932 this = self._parse_is(this) 2933 2934 return this 2935 2936 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2937 index = self._index - 1 2938 negate = self._match(TokenType.NOT) 2939 2940 if self._match_text_seq("DISTINCT", "FROM"): 2941 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2942 return self.expression(klass, this=this, expression=self._parse_expression()) 2943 2944 expression = self._parse_null() or self._parse_boolean() 2945 if not expression: 2946 self._retreat(index) 2947 return None 2948 2949 this = self.expression(exp.Is, this=this, expression=expression) 2950 return self.expression(exp.Not, this=this) if negate else this 2951 2952 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2953 unnest = self._parse_unnest(with_alias=False) 2954 if unnest: 2955 this = self.expression(exp.In, this=this, unnest=unnest) 2956 elif self._match(TokenType.L_PAREN): 2957 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2958 2959 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2960 this = self.expression(exp.In, this=this, query=expressions[0]) 2961 else: 2962 this = self.expression(exp.In, this=this, expressions=expressions) 2963 2964 self._match_r_paren(this) 2965 else: 2966 this = self.expression(exp.In, this=this, field=self._parse_field()) 2967 2968 return this 2969 2970 def _parse_between(self, this: exp.Expression) -> exp.Between: 2971 low = self._parse_bitwise() 2972 self._match(TokenType.AND) 2973 high = self._parse_bitwise() 2974 return self.expression(exp.Between, this=this, low=low, high=high) 2975 2976 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2977 if not self._match(TokenType.ESCAPE): 2978 return this 2979 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2980 2981 def _parse_interval(self) -> t.Optional[exp.Interval]: 2982 if not self._match(TokenType.INTERVAL): 2983 return None 2984 2985 if self._match(TokenType.STRING, advance=False): 2986 this = self._parse_primary() 2987 else: 2988 this = self._parse_term() 2989 2990 unit = self._parse_function() or self._parse_var() 2991 2992 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2993 # each INTERVAL expression into this canonical form so it's easy to transpile 2994 if this and this.is_number: 2995 this = exp.Literal.string(this.name) 2996 elif this and this.is_string: 2997 parts = this.name.split() 2998 2999 if len(parts) == 2: 3000 if unit: 3001 # this is not actually a unit, it's something else 3002 unit = None 3003 self._retreat(self._index - 1) 3004 else: 3005 this = exp.Literal.string(parts[0]) 3006 unit = self.expression(exp.Var, this=parts[1]) 3007 3008 return self.expression(exp.Interval, this=this, unit=unit) 3009 3010 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3011 this = self._parse_term() 3012 3013 while True: 3014 if self._match_set(self.BITWISE): 3015 this = self.expression( 3016 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3017 ) 3018 elif self._match_pair(TokenType.LT, TokenType.LT): 3019 this = self.expression( 3020 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3021 ) 3022 elif self._match_pair(TokenType.GT, TokenType.GT): 3023 this = self.expression( 3024 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3025 ) 3026 else: 3027 break 3028 3029 return this 3030 3031 def _parse_term(self) -> t.Optional[exp.Expression]: 3032 return self._parse_tokens(self._parse_factor, self.TERM) 3033 3034 def _parse_factor(self) -> t.Optional[exp.Expression]: 3035 return self._parse_tokens(self._parse_unary, self.FACTOR) 3036 3037 def _parse_unary(self) -> t.Optional[exp.Expression]: 3038 if self._match_set(self.UNARY_PARSERS): 3039 return self.UNARY_PARSERS[self._prev.token_type](self) 3040 return self._parse_at_time_zone(self._parse_type()) 3041 3042 def _parse_type(self) -> t.Optional[exp.Expression]: 3043 interval = self._parse_interval() 3044 if interval: 3045 return interval 3046 3047 index = self._index 3048 data_type = self._parse_types(check_func=True) 3049 this = self._parse_column() 3050 3051 if data_type: 3052 if isinstance(this, exp.Literal): 3053 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3054 if parser: 3055 return parser(self, this, data_type) 3056 return self.expression(exp.Cast, this=this, to=data_type) 3057 if not data_type.expressions: 3058 self._retreat(index) 3059 return self._parse_column() 3060 return self._parse_column_ops(data_type) 3061 3062 return this 3063 3064 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3065 this = self._parse_type() 3066 if not this: 3067 return None 3068 3069 return self.expression( 3070 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3071 ) 3072 3073 def _parse_types( 3074 self, check_func: bool = False, schema: bool = False 3075 ) -> t.Optional[exp.Expression]: 3076 index = self._index 3077 3078 prefix = self._match_text_seq("SYSUDTLIB", ".") 3079 3080 if not self._match_set(self.TYPE_TOKENS): 3081 return None 3082 3083 type_token = self._prev.token_type 3084 3085 if type_token == TokenType.PSEUDO_TYPE: 3086 return self.expression(exp.PseudoType, this=self._prev.text) 3087 3088 nested = type_token in self.NESTED_TYPE_TOKENS 3089 is_struct = type_token == TokenType.STRUCT 3090 expressions = None 3091 maybe_func = False 3092 3093 if self._match(TokenType.L_PAREN): 3094 if is_struct: 3095 expressions = self._parse_csv(self._parse_struct_types) 3096 elif nested: 3097 expressions = self._parse_csv( 3098 lambda: self._parse_types(check_func=check_func, schema=schema) 3099 ) 3100 elif type_token in self.ENUM_TYPE_TOKENS: 3101 expressions = self._parse_csv(self._parse_primary) 3102 else: 3103 expressions = self._parse_csv(self._parse_type_size) 3104 3105 if not expressions or not self._match(TokenType.R_PAREN): 3106 self._retreat(index) 3107 return None 3108 3109 maybe_func = True 3110 3111 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3112 this = exp.DataType( 3113 this=exp.DataType.Type.ARRAY, 3114 expressions=[ 3115 exp.DataType( 3116 this=exp.DataType.Type[type_token.value], 3117 expressions=expressions, 3118 nested=nested, 3119 ) 3120 ], 3121 nested=True, 3122 ) 3123 3124 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3125 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3126 3127 return this 3128 3129 if self._match(TokenType.L_BRACKET): 3130 self._retreat(index) 3131 return None 3132 3133 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3134 if nested and self._match(TokenType.LT): 3135 if is_struct: 3136 expressions = self._parse_csv(self._parse_struct_types) 3137 else: 3138 expressions = self._parse_csv( 3139 lambda: self._parse_types(check_func=check_func, schema=schema) 3140 ) 3141 3142 if not self._match(TokenType.GT): 3143 self.raise_error("Expecting >") 3144 3145 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3146 values = self._parse_csv(self._parse_conjunction) 3147 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3148 3149 value: t.Optional[exp.Expression] = None 3150 if type_token in self.TIMESTAMPS: 3151 if self._match_text_seq("WITH", "TIME", "ZONE"): 3152 maybe_func = False 3153 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3154 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3155 maybe_func = False 3156 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3157 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3158 maybe_func = False 3159 elif type_token == TokenType.INTERVAL: 3160 unit = self._parse_var() 3161 3162 if not unit: 3163 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3164 else: 3165 value = self.expression(exp.Interval, unit=unit) 3166 3167 if maybe_func and check_func: 3168 index2 = self._index 3169 peek = self._parse_string() 3170 3171 if not peek: 3172 self._retreat(index) 3173 return None 3174 3175 self._retreat(index2) 3176 3177 if value: 3178 return value 3179 3180 return exp.DataType( 3181 this=exp.DataType.Type[type_token.value], 3182 expressions=expressions, 3183 nested=nested, 3184 values=values, 3185 prefix=prefix, 3186 ) 3187 3188 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3189 this = self._parse_type() or self._parse_id_var() 3190 self._match(TokenType.COLON) 3191 return self._parse_column_def(this) 3192 3193 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3194 if not self._match_text_seq("AT", "TIME", "ZONE"): 3195 return this 3196 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3197 3198 def _parse_column(self) -> t.Optional[exp.Expression]: 3199 this = self._parse_field() 3200 if isinstance(this, exp.Identifier): 3201 this = self.expression(exp.Column, this=this) 3202 elif not this: 3203 return self._parse_bracket(this) 3204 return self._parse_column_ops(this) 3205 3206 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3207 this = self._parse_bracket(this) 3208 3209 while self._match_set(self.COLUMN_OPERATORS): 3210 op_token = self._prev.token_type 3211 op = self.COLUMN_OPERATORS.get(op_token) 3212 3213 if op_token == TokenType.DCOLON: 3214 field = self._parse_types() 3215 if not field: 3216 self.raise_error("Expected type") 3217 elif op and self._curr: 3218 self._advance() 3219 value = self._prev.text 3220 field = ( 3221 exp.Literal.number(value) 3222 if self._prev.token_type == TokenType.NUMBER 3223 else exp.Literal.string(value) 3224 ) 3225 else: 3226 field = self._parse_field(anonymous_func=True, any_token=True) 3227 3228 if isinstance(field, exp.Func): 3229 # bigquery allows function calls like x.y.count(...) 3230 # SAFE.SUBSTR(...) 3231 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3232 this = self._replace_columns_with_dots(this) 3233 3234 if op: 3235 this = op(self, this, field) 3236 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3237 this = self.expression( 3238 exp.Column, 3239 this=field, 3240 table=this.this, 3241 db=this.args.get("table"), 3242 catalog=this.args.get("db"), 3243 ) 3244 else: 3245 this = self.expression(exp.Dot, this=this, expression=field) 3246 this = self._parse_bracket(this) 3247 return this 3248 3249 def _parse_primary(self) -> t.Optional[exp.Expression]: 3250 if self._match_set(self.PRIMARY_PARSERS): 3251 token_type = self._prev.token_type 3252 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3253 3254 if token_type == TokenType.STRING: 3255 expressions = [primary] 3256 while self._match(TokenType.STRING): 3257 expressions.append(exp.Literal.string(self._prev.text)) 3258 3259 if len(expressions) > 1: 3260 return self.expression(exp.Concat, expressions=expressions) 3261 3262 return primary 3263 3264 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3265 return exp.Literal.number(f"0.{self._prev.text}") 3266 3267 if self._match(TokenType.L_PAREN): 3268 comments = self._prev_comments 3269 query = self._parse_select() 3270 3271 if query: 3272 expressions = [query] 3273 else: 3274 expressions = self._parse_expressions() 3275 3276 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3277 3278 if isinstance(this, exp.Subqueryable): 3279 this = self._parse_set_operations( 3280 self._parse_subquery(this=this, parse_alias=False) 3281 ) 3282 elif len(expressions) > 1: 3283 this = self.expression(exp.Tuple, expressions=expressions) 3284 else: 3285 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3286 3287 if this: 3288 this.add_comments(comments) 3289 3290 self._match_r_paren(expression=this) 3291 return this 3292 3293 return None 3294 3295 def _parse_field( 3296 self, 3297 any_token: bool = False, 3298 tokens: t.Optional[t.Collection[TokenType]] = None, 3299 anonymous_func: bool = False, 3300 ) -> t.Optional[exp.Expression]: 3301 return ( 3302 self._parse_primary() 3303 or self._parse_function(anonymous=anonymous_func) 3304 or self._parse_id_var(any_token=any_token, tokens=tokens) 3305 ) 3306 3307 def _parse_function( 3308 self, 3309 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3310 anonymous: bool = False, 3311 optional_parens: bool = True, 3312 ) -> t.Optional[exp.Expression]: 3313 if not self._curr: 3314 return None 3315 3316 token_type = self._curr.token_type 3317 3318 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3319 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3320 3321 if not self._next or self._next.token_type != TokenType.L_PAREN: 3322 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3323 self._advance() 3324 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3325 3326 return None 3327 3328 if token_type not in self.FUNC_TOKENS: 3329 return None 3330 3331 this = self._curr.text 3332 upper = this.upper() 3333 self._advance(2) 3334 3335 parser = self.FUNCTION_PARSERS.get(upper) 3336 3337 if parser and not anonymous: 3338 this = parser(self) 3339 else: 3340 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3341 3342 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3343 this = self.expression(subquery_predicate, this=self._parse_select()) 3344 self._match_r_paren() 3345 return this 3346 3347 if functions is None: 3348 functions = self.FUNCTIONS 3349 3350 function = functions.get(upper) 3351 3352 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3353 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3354 3355 if function and not anonymous: 3356 this = self.validate_expression(function(args), args) 3357 else: 3358 this = self.expression(exp.Anonymous, this=this, expressions=args) 3359 3360 self._match(TokenType.R_PAREN, expression=this) 3361 return self._parse_window(this) 3362 3363 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3364 return self._parse_column_def(self._parse_id_var()) 3365 3366 def _parse_user_defined_function( 3367 self, kind: t.Optional[TokenType] = None 3368 ) -> t.Optional[exp.Expression]: 3369 this = self._parse_id_var() 3370 3371 while self._match(TokenType.DOT): 3372 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3373 3374 if not self._match(TokenType.L_PAREN): 3375 return this 3376 3377 expressions = self._parse_csv(self._parse_function_parameter) 3378 self._match_r_paren() 3379 return self.expression( 3380 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3381 ) 3382 3383 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3384 literal = self._parse_primary() 3385 if literal: 3386 return self.expression(exp.Introducer, this=token.text, expression=literal) 3387 3388 return self.expression(exp.Identifier, this=token.text) 3389 3390 def _parse_session_parameter(self) -> exp.SessionParameter: 3391 kind = None 3392 this = self._parse_id_var() or self._parse_primary() 3393 3394 if this and self._match(TokenType.DOT): 3395 kind = this.name 3396 this = self._parse_var() or self._parse_primary() 3397 3398 return self.expression(exp.SessionParameter, this=this, kind=kind) 3399 3400 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3401 index = self._index 3402 3403 if self._match(TokenType.L_PAREN): 3404 expressions = self._parse_csv(self._parse_id_var) 3405 3406 if not self._match(TokenType.R_PAREN): 3407 self._retreat(index) 3408 else: 3409 expressions = [self._parse_id_var()] 3410 3411 if self._match_set(self.LAMBDAS): 3412 return self.LAMBDAS[self._prev.token_type](self, expressions) 3413 3414 self._retreat(index) 3415 3416 this: t.Optional[exp.Expression] 3417 3418 if self._match(TokenType.DISTINCT): 3419 this = self.expression( 3420 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3421 ) 3422 else: 3423 this = self._parse_select_or_expression(alias=alias) 3424 3425 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3426 3427 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3428 index = self._index 3429 3430 if not self.errors: 3431 try: 3432 if self._parse_select(nested=True): 3433 return this 3434 except ParseError: 3435 pass 3436 finally: 3437 self.errors.clear() 3438 self._retreat(index) 3439 3440 if not self._match(TokenType.L_PAREN): 3441 return this 3442 3443 args = self._parse_csv( 3444 lambda: self._parse_constraint() 3445 or self._parse_column_def(self._parse_field(any_token=True)) 3446 ) 3447 3448 self._match_r_paren() 3449 return self.expression(exp.Schema, this=this, expressions=args) 3450 3451 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3452 # column defs are not really columns, they're identifiers 3453 if isinstance(this, exp.Column): 3454 this = this.this 3455 3456 kind = self._parse_types(schema=True) 3457 3458 if self._match_text_seq("FOR", "ORDINALITY"): 3459 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3460 3461 constraints = [] 3462 while True: 3463 constraint = self._parse_column_constraint() 3464 if not constraint: 3465 break 3466 constraints.append(constraint) 3467 3468 if not kind and not constraints: 3469 return this 3470 3471 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3472 3473 def _parse_auto_increment( 3474 self, 3475 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3476 start = None 3477 increment = None 3478 3479 if self._match(TokenType.L_PAREN, advance=False): 3480 args = self._parse_wrapped_csv(self._parse_bitwise) 3481 start = seq_get(args, 0) 3482 increment = seq_get(args, 1) 3483 elif self._match_text_seq("START"): 3484 start = self._parse_bitwise() 3485 self._match_text_seq("INCREMENT") 3486 increment = self._parse_bitwise() 3487 3488 if start and increment: 3489 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3490 3491 return exp.AutoIncrementColumnConstraint() 3492 3493 def _parse_compress(self) -> exp.CompressColumnConstraint: 3494 if self._match(TokenType.L_PAREN, advance=False): 3495 return self.expression( 3496 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3497 ) 3498 3499 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3500 3501 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3502 if self._match_text_seq("BY", "DEFAULT"): 3503 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3504 this = self.expression( 3505 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3506 ) 3507 else: 3508 self._match_text_seq("ALWAYS") 3509 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3510 3511 self._match(TokenType.ALIAS) 3512 identity = self._match_text_seq("IDENTITY") 3513 3514 if self._match(TokenType.L_PAREN): 3515 if self._match_text_seq("START", "WITH"): 3516 this.set("start", self._parse_bitwise()) 3517 if self._match_text_seq("INCREMENT", "BY"): 3518 this.set("increment", self._parse_bitwise()) 3519 if self._match_text_seq("MINVALUE"): 3520 this.set("minvalue", self._parse_bitwise()) 3521 if self._match_text_seq("MAXVALUE"): 3522 this.set("maxvalue", self._parse_bitwise()) 3523 3524 if self._match_text_seq("CYCLE"): 3525 this.set("cycle", True) 3526 elif self._match_text_seq("NO", "CYCLE"): 3527 this.set("cycle", False) 3528 3529 if not identity: 3530 this.set("expression", self._parse_bitwise()) 3531 3532 self._match_r_paren() 3533 3534 return this 3535 3536 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3537 self._match_text_seq("LENGTH") 3538 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3539 3540 def _parse_not_constraint( 3541 self, 3542 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3543 if self._match_text_seq("NULL"): 3544 return self.expression(exp.NotNullColumnConstraint) 3545 if self._match_text_seq("CASESPECIFIC"): 3546 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3547 return None 3548 3549 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3550 if self._match(TokenType.CONSTRAINT): 3551 this = self._parse_id_var() 3552 else: 3553 this = None 3554 3555 if self._match_texts(self.CONSTRAINT_PARSERS): 3556 return self.expression( 3557 exp.ColumnConstraint, 3558 this=this, 3559 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3560 ) 3561 3562 return this 3563 3564 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3565 if not self._match(TokenType.CONSTRAINT): 3566 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3567 3568 this = self._parse_id_var() 3569 expressions = [] 3570 3571 while True: 3572 constraint = self._parse_unnamed_constraint() or self._parse_function() 3573 if not constraint: 3574 break 3575 expressions.append(constraint) 3576 3577 return self.expression(exp.Constraint, this=this, expressions=expressions) 3578 3579 def _parse_unnamed_constraint( 3580 self, constraints: t.Optional[t.Collection[str]] = None 3581 ) -> t.Optional[exp.Expression]: 3582 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3583 return None 3584 3585 constraint = self._prev.text.upper() 3586 if constraint not in self.CONSTRAINT_PARSERS: 3587 self.raise_error(f"No parser found for schema constraint {constraint}.") 3588 3589 return self.CONSTRAINT_PARSERS[constraint](self) 3590 3591 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3592 self._match_text_seq("KEY") 3593 return self.expression( 3594 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3595 ) 3596 3597 def _parse_key_constraint_options(self) -> t.List[str]: 3598 options = [] 3599 while True: 3600 if not self._curr: 3601 break 3602 3603 if self._match(TokenType.ON): 3604 action = None 3605 on = self._advance_any() and self._prev.text 3606 3607 if self._match_text_seq("NO", "ACTION"): 3608 action = "NO ACTION" 3609 elif self._match_text_seq("CASCADE"): 3610 action = "CASCADE" 3611 elif self._match_pair(TokenType.SET, TokenType.NULL): 3612 action = "SET NULL" 3613 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3614 action = "SET DEFAULT" 3615 else: 3616 self.raise_error("Invalid key constraint") 3617 3618 options.append(f"ON {on} {action}") 3619 elif self._match_text_seq("NOT", "ENFORCED"): 3620 options.append("NOT ENFORCED") 3621 elif self._match_text_seq("DEFERRABLE"): 3622 options.append("DEFERRABLE") 3623 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3624 options.append("INITIALLY DEFERRED") 3625 elif self._match_text_seq("NORELY"): 3626 options.append("NORELY") 3627 elif self._match_text_seq("MATCH", "FULL"): 3628 options.append("MATCH FULL") 3629 else: 3630 break 3631 3632 return options 3633 3634 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3635 if match and not self._match(TokenType.REFERENCES): 3636 return None 3637 3638 expressions = None 3639 this = self._parse_table(schema=True) 3640 options = self._parse_key_constraint_options() 3641 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3642 3643 def _parse_foreign_key(self) -> exp.ForeignKey: 3644 expressions = self._parse_wrapped_id_vars() 3645 reference = self._parse_references() 3646 options = {} 3647 3648 while self._match(TokenType.ON): 3649 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3650 self.raise_error("Expected DELETE or UPDATE") 3651 3652 kind = self._prev.text.lower() 3653 3654 if self._match_text_seq("NO", "ACTION"): 3655 action = "NO ACTION" 3656 elif self._match(TokenType.SET): 3657 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3658 action = "SET " + self._prev.text.upper() 3659 else: 3660 self._advance() 3661 action = self._prev.text.upper() 3662 3663 options[kind] = action 3664 3665 return self.expression( 3666 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3667 ) 3668 3669 def _parse_primary_key( 3670 self, wrapped_optional: bool = False, in_props: bool = False 3671 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3672 desc = ( 3673 self._match_set((TokenType.ASC, TokenType.DESC)) 3674 and self._prev.token_type == TokenType.DESC 3675 ) 3676 3677 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3678 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3679 3680 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3681 options = self._parse_key_constraint_options() 3682 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3683 3684 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3685 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3686 return this 3687 3688 bracket_kind = self._prev.token_type 3689 3690 if self._match(TokenType.COLON): 3691 expressions: t.List[t.Optional[exp.Expression]] = [ 3692 self.expression(exp.Slice, expression=self._parse_conjunction()) 3693 ] 3694 else: 3695 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3696 3697 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3698 if bracket_kind == TokenType.L_BRACE: 3699 this = self.expression(exp.Struct, expressions=expressions) 3700 elif not this or this.name.upper() == "ARRAY": 3701 this = self.expression(exp.Array, expressions=expressions) 3702 else: 3703 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3704 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3705 3706 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3707 self.raise_error("Expected ]") 3708 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3709 self.raise_error("Expected }") 3710 3711 self._add_comments(this) 3712 return self._parse_bracket(this) 3713 3714 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3715 if self._match(TokenType.COLON): 3716 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3717 return this 3718 3719 def _parse_case(self) -> t.Optional[exp.Expression]: 3720 ifs = [] 3721 default = None 3722 3723 expression = self._parse_conjunction() 3724 3725 while self._match(TokenType.WHEN): 3726 this = self._parse_conjunction() 3727 self._match(TokenType.THEN) 3728 then = self._parse_conjunction() 3729 ifs.append(self.expression(exp.If, this=this, true=then)) 3730 3731 if self._match(TokenType.ELSE): 3732 default = self._parse_conjunction() 3733 3734 if not self._match(TokenType.END): 3735 self.raise_error("Expected END after CASE", self._prev) 3736 3737 return self._parse_window( 3738 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3739 ) 3740 3741 def _parse_if(self) -> t.Optional[exp.Expression]: 3742 if self._match(TokenType.L_PAREN): 3743 args = self._parse_csv(self._parse_conjunction) 3744 this = self.validate_expression(exp.If.from_arg_list(args), args) 3745 self._match_r_paren() 3746 else: 3747 index = self._index - 1 3748 condition = self._parse_conjunction() 3749 3750 if not condition: 3751 self._retreat(index) 3752 return None 3753 3754 self._match(TokenType.THEN) 3755 true = self._parse_conjunction() 3756 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3757 self._match(TokenType.END) 3758 this = self.expression(exp.If, this=condition, true=true, false=false) 3759 3760 return self._parse_window(this) 3761 3762 def _parse_extract(self) -> exp.Extract: 3763 this = self._parse_function() or self._parse_var() or self._parse_type() 3764 3765 if self._match(TokenType.FROM): 3766 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3767 3768 if not self._match(TokenType.COMMA): 3769 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3770 3771 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3772 3773 def _parse_any_value(self) -> exp.AnyValue: 3774 this = self._parse_lambda() 3775 is_max = None 3776 having = None 3777 3778 if self._match(TokenType.HAVING): 3779 self._match_texts(("MAX", "MIN")) 3780 is_max = self._prev.text == "MAX" 3781 having = self._parse_column() 3782 3783 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3784 3785 def _parse_cast(self, strict: bool) -> exp.Expression: 3786 this = self._parse_conjunction() 3787 3788 if not self._match(TokenType.ALIAS): 3789 if self._match(TokenType.COMMA): 3790 return self.expression( 3791 exp.CastToStrType, this=this, expression=self._parse_string() 3792 ) 3793 else: 3794 self.raise_error("Expected AS after CAST") 3795 3796 fmt = None 3797 to = self._parse_types() 3798 3799 if not to: 3800 self.raise_error("Expected TYPE after CAST") 3801 elif to.this == exp.DataType.Type.CHAR: 3802 if self._match(TokenType.CHARACTER_SET): 3803 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3804 elif self._match(TokenType.FORMAT): 3805 fmt_string = self._parse_string() 3806 fmt = self._parse_at_time_zone(fmt_string) 3807 3808 if to.this in exp.DataType.TEMPORAL_TYPES: 3809 this = self.expression( 3810 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3811 this=this, 3812 format=exp.Literal.string( 3813 format_time( 3814 fmt_string.this if fmt_string else "", 3815 self.FORMAT_MAPPING or self.TIME_MAPPING, 3816 self.FORMAT_TRIE or self.TIME_TRIE, 3817 ) 3818 ), 3819 ) 3820 3821 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3822 this.set("zone", fmt.args["zone"]) 3823 3824 return this 3825 3826 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3827 3828 def _parse_concat(self) -> t.Optional[exp.Expression]: 3829 args = self._parse_csv(self._parse_conjunction) 3830 if self.CONCAT_NULL_OUTPUTS_STRING: 3831 args = [ 3832 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3833 for arg in args 3834 if arg 3835 ] 3836 3837 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3838 # we find such a call we replace it with its argument. 3839 if len(args) == 1: 3840 return args[0] 3841 3842 return self.expression( 3843 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3844 ) 3845 3846 def _parse_string_agg(self) -> exp.Expression: 3847 if self._match(TokenType.DISTINCT): 3848 args: t.List[t.Optional[exp.Expression]] = [ 3849 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3850 ] 3851 if self._match(TokenType.COMMA): 3852 args.extend(self._parse_csv(self._parse_conjunction)) 3853 else: 3854 args = self._parse_csv(self._parse_conjunction) 3855 3856 index = self._index 3857 if not self._match(TokenType.R_PAREN): 3858 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3859 return self.expression( 3860 exp.GroupConcat, 3861 this=seq_get(args, 0), 3862 separator=self._parse_order(this=seq_get(args, 1)), 3863 ) 3864 3865 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3866 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3867 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3868 if not self._match_text_seq("WITHIN", "GROUP"): 3869 self._retreat(index) 3870 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3871 3872 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3873 order = self._parse_order(this=seq_get(args, 0)) 3874 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3875 3876 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3877 this = self._parse_bitwise() 3878 3879 if self._match(TokenType.USING): 3880 to: t.Optional[exp.Expression] = self.expression( 3881 exp.CharacterSet, this=self._parse_var() 3882 ) 3883 elif self._match(TokenType.COMMA): 3884 to = self._parse_types() 3885 else: 3886 to = None 3887 3888 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3889 3890 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3891 """ 3892 There are generally two variants of the DECODE function: 3893 3894 - DECODE(bin, charset) 3895 - DECODE(expression, search, result [, search, result] ... [, default]) 3896 3897 The second variant will always be parsed into a CASE expression. Note that NULL 3898 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3899 instead of relying on pattern matching. 3900 """ 3901 args = self._parse_csv(self._parse_conjunction) 3902 3903 if len(args) < 3: 3904 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3905 3906 expression, *expressions = args 3907 if not expression: 3908 return None 3909 3910 ifs = [] 3911 for search, result in zip(expressions[::2], expressions[1::2]): 3912 if not search or not result: 3913 return None 3914 3915 if isinstance(search, exp.Literal): 3916 ifs.append( 3917 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3918 ) 3919 elif isinstance(search, exp.Null): 3920 ifs.append( 3921 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3922 ) 3923 else: 3924 cond = exp.or_( 3925 exp.EQ(this=expression.copy(), expression=search), 3926 exp.and_( 3927 exp.Is(this=expression.copy(), expression=exp.Null()), 3928 exp.Is(this=search.copy(), expression=exp.Null()), 3929 copy=False, 3930 ), 3931 copy=False, 3932 ) 3933 ifs.append(exp.If(this=cond, true=result)) 3934 3935 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3936 3937 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3938 self._match_text_seq("KEY") 3939 key = self._parse_field() 3940 self._match(TokenType.COLON) 3941 self._match_text_seq("VALUE") 3942 value = self._parse_field() 3943 3944 if not key and not value: 3945 return None 3946 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3947 3948 def _parse_json_object(self) -> exp.JSONObject: 3949 star = self._parse_star() 3950 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3951 3952 null_handling = None 3953 if self._match_text_seq("NULL", "ON", "NULL"): 3954 null_handling = "NULL ON NULL" 3955 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3956 null_handling = "ABSENT ON NULL" 3957 3958 unique_keys = None 3959 if self._match_text_seq("WITH", "UNIQUE"): 3960 unique_keys = True 3961 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3962 unique_keys = False 3963 3964 self._match_text_seq("KEYS") 3965 3966 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3967 format_json = self._match_text_seq("FORMAT", "JSON") 3968 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3969 3970 return self.expression( 3971 exp.JSONObject, 3972 expressions=expressions, 3973 null_handling=null_handling, 3974 unique_keys=unique_keys, 3975 return_type=return_type, 3976 format_json=format_json, 3977 encoding=encoding, 3978 ) 3979 3980 def _parse_logarithm(self) -> exp.Func: 3981 # Default argument order is base, expression 3982 args = self._parse_csv(self._parse_range) 3983 3984 if len(args) > 1: 3985 if not self.LOG_BASE_FIRST: 3986 args.reverse() 3987 return exp.Log.from_arg_list(args) 3988 3989 return self.expression( 3990 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3991 ) 3992 3993 def _parse_match_against(self) -> exp.MatchAgainst: 3994 expressions = self._parse_csv(self._parse_column) 3995 3996 self._match_text_seq(")", "AGAINST", "(") 3997 3998 this = self._parse_string() 3999 4000 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4001 modifier = "IN NATURAL LANGUAGE MODE" 4002 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4003 modifier = f"{modifier} WITH QUERY EXPANSION" 4004 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4005 modifier = "IN BOOLEAN MODE" 4006 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4007 modifier = "WITH QUERY EXPANSION" 4008 else: 4009 modifier = None 4010 4011 return self.expression( 4012 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4013 ) 4014 4015 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4016 def _parse_open_json(self) -> exp.OpenJSON: 4017 this = self._parse_bitwise() 4018 path = self._match(TokenType.COMMA) and self._parse_string() 4019 4020 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4021 this = self._parse_field(any_token=True) 4022 kind = self._parse_types() 4023 path = self._parse_string() 4024 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4025 4026 return self.expression( 4027 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4028 ) 4029 4030 expressions = None 4031 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4032 self._match_l_paren() 4033 expressions = self._parse_csv(_parse_open_json_column_def) 4034 4035 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4036 4037 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4038 args = self._parse_csv(self._parse_bitwise) 4039 4040 if self._match(TokenType.IN): 4041 return self.expression( 4042 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4043 ) 4044 4045 if haystack_first: 4046 haystack = seq_get(args, 0) 4047 needle = seq_get(args, 1) 4048 else: 4049 needle = seq_get(args, 0) 4050 haystack = seq_get(args, 1) 4051 4052 return self.expression( 4053 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4054 ) 4055 4056 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4057 args = self._parse_csv(self._parse_table) 4058 return exp.JoinHint(this=func_name.upper(), expressions=args) 4059 4060 def _parse_substring(self) -> exp.Substring: 4061 # Postgres supports the form: substring(string [from int] [for int]) 4062 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4063 4064 args = self._parse_csv(self._parse_bitwise) 4065 4066 if self._match(TokenType.FROM): 4067 args.append(self._parse_bitwise()) 4068 if self._match(TokenType.FOR): 4069 args.append(self._parse_bitwise()) 4070 4071 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4072 4073 def _parse_trim(self) -> exp.Trim: 4074 # https://www.w3resource.com/sql/character-functions/trim.php 4075 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4076 4077 position = None 4078 collation = None 4079 4080 if self._match_texts(self.TRIM_TYPES): 4081 position = self._prev.text.upper() 4082 4083 expression = self._parse_bitwise() 4084 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4085 this = self._parse_bitwise() 4086 else: 4087 this = expression 4088 expression = None 4089 4090 if self._match(TokenType.COLLATE): 4091 collation = self._parse_bitwise() 4092 4093 return self.expression( 4094 exp.Trim, this=this, position=position, expression=expression, collation=collation 4095 ) 4096 4097 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4098 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4099 4100 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4101 return self._parse_window(self._parse_id_var(), alias=True) 4102 4103 def _parse_respect_or_ignore_nulls( 4104 self, this: t.Optional[exp.Expression] 4105 ) -> t.Optional[exp.Expression]: 4106 if self._match_text_seq("IGNORE", "NULLS"): 4107 return self.expression(exp.IgnoreNulls, this=this) 4108 if self._match_text_seq("RESPECT", "NULLS"): 4109 return self.expression(exp.RespectNulls, this=this) 4110 return this 4111 4112 def _parse_window( 4113 self, this: t.Optional[exp.Expression], alias: bool = False 4114 ) -> t.Optional[exp.Expression]: 4115 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4116 self._match(TokenType.WHERE) 4117 this = self.expression( 4118 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4119 ) 4120 self._match_r_paren() 4121 4122 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4123 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4124 if self._match_text_seq("WITHIN", "GROUP"): 4125 order = self._parse_wrapped(self._parse_order) 4126 this = self.expression(exp.WithinGroup, this=this, expression=order) 4127 4128 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4129 # Some dialects choose to implement and some do not. 4130 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4131 4132 # There is some code above in _parse_lambda that handles 4133 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4134 4135 # The below changes handle 4136 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4137 4138 # Oracle allows both formats 4139 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4140 # and Snowflake chose to do the same for familiarity 4141 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4142 this = self._parse_respect_or_ignore_nulls(this) 4143 4144 # bigquery select from window x AS (partition by ...) 4145 if alias: 4146 over = None 4147 self._match(TokenType.ALIAS) 4148 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4149 return this 4150 else: 4151 over = self._prev.text.upper() 4152 4153 if not self._match(TokenType.L_PAREN): 4154 return self.expression( 4155 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4156 ) 4157 4158 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4159 4160 first = self._match(TokenType.FIRST) 4161 if self._match_text_seq("LAST"): 4162 first = False 4163 4164 partition = self._parse_partition_by() 4165 order = self._parse_order() 4166 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4167 4168 if kind: 4169 self._match(TokenType.BETWEEN) 4170 start = self._parse_window_spec() 4171 self._match(TokenType.AND) 4172 end = self._parse_window_spec() 4173 4174 spec = self.expression( 4175 exp.WindowSpec, 4176 kind=kind, 4177 start=start["value"], 4178 start_side=start["side"], 4179 end=end["value"], 4180 end_side=end["side"], 4181 ) 4182 else: 4183 spec = None 4184 4185 self._match_r_paren() 4186 4187 window = self.expression( 4188 exp.Window, 4189 this=this, 4190 partition_by=partition, 4191 order=order, 4192 spec=spec, 4193 alias=window_alias, 4194 over=over, 4195 first=first, 4196 ) 4197 4198 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4199 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4200 return self._parse_window(window, alias=alias) 4201 4202 return window 4203 4204 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4205 self._match(TokenType.BETWEEN) 4206 4207 return { 4208 "value": ( 4209 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4210 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4211 or self._parse_bitwise() 4212 ), 4213 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4214 } 4215 4216 def _parse_alias( 4217 self, this: t.Optional[exp.Expression], explicit: bool = False 4218 ) -> t.Optional[exp.Expression]: 4219 any_token = self._match(TokenType.ALIAS) 4220 4221 if explicit and not any_token: 4222 return this 4223 4224 if self._match(TokenType.L_PAREN): 4225 aliases = self.expression( 4226 exp.Aliases, 4227 this=this, 4228 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4229 ) 4230 self._match_r_paren(aliases) 4231 return aliases 4232 4233 alias = self._parse_id_var(any_token) 4234 4235 if alias: 4236 return self.expression(exp.Alias, this=this, alias=alias) 4237 4238 return this 4239 4240 def _parse_id_var( 4241 self, 4242 any_token: bool = True, 4243 tokens: t.Optional[t.Collection[TokenType]] = None, 4244 ) -> t.Optional[exp.Expression]: 4245 identifier = self._parse_identifier() 4246 4247 if identifier: 4248 return identifier 4249 4250 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4251 quoted = self._prev.token_type == TokenType.STRING 4252 return exp.Identifier(this=self._prev.text, quoted=quoted) 4253 4254 return None 4255 4256 def _parse_string(self) -> t.Optional[exp.Expression]: 4257 if self._match(TokenType.STRING): 4258 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4259 return self._parse_placeholder() 4260 4261 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4262 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4263 4264 def _parse_number(self) -> t.Optional[exp.Expression]: 4265 if self._match(TokenType.NUMBER): 4266 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4267 return self._parse_placeholder() 4268 4269 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4270 if self._match(TokenType.IDENTIFIER): 4271 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4272 return self._parse_placeholder() 4273 4274 def _parse_var( 4275 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4276 ) -> t.Optional[exp.Expression]: 4277 if ( 4278 (any_token and self._advance_any()) 4279 or self._match(TokenType.VAR) 4280 or (self._match_set(tokens) if tokens else False) 4281 ): 4282 return self.expression(exp.Var, this=self._prev.text) 4283 return self._parse_placeholder() 4284 4285 def _advance_any(self) -> t.Optional[Token]: 4286 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4287 self._advance() 4288 return self._prev 4289 return None 4290 4291 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4292 return self._parse_var() or self._parse_string() 4293 4294 def _parse_null(self) -> t.Optional[exp.Expression]: 4295 if self._match(TokenType.NULL): 4296 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4297 return None 4298 4299 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4300 if self._match(TokenType.TRUE): 4301 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4302 if self._match(TokenType.FALSE): 4303 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4304 return None 4305 4306 def _parse_star(self) -> t.Optional[exp.Expression]: 4307 if self._match(TokenType.STAR): 4308 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4309 return None 4310 4311 def _parse_parameter(self) -> exp.Parameter: 4312 wrapped = self._match(TokenType.L_BRACE) 4313 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4314 self._match(TokenType.R_BRACE) 4315 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4316 4317 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4318 if self._match_set(self.PLACEHOLDER_PARSERS): 4319 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4320 if placeholder: 4321 return placeholder 4322 self._advance(-1) 4323 return None 4324 4325 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4326 if not self._match(TokenType.EXCEPT): 4327 return None 4328 if self._match(TokenType.L_PAREN, advance=False): 4329 return self._parse_wrapped_csv(self._parse_column) 4330 return self._parse_csv(self._parse_column) 4331 4332 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4333 if not self._match(TokenType.REPLACE): 4334 return None 4335 if self._match(TokenType.L_PAREN, advance=False): 4336 return self._parse_wrapped_csv(self._parse_expression) 4337 return self._parse_expressions() 4338 4339 def _parse_csv( 4340 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4341 ) -> t.List[t.Optional[exp.Expression]]: 4342 parse_result = parse_method() 4343 items = [parse_result] if parse_result is not None else [] 4344 4345 while self._match(sep): 4346 self._add_comments(parse_result) 4347 parse_result = parse_method() 4348 if parse_result is not None: 4349 items.append(parse_result) 4350 4351 return items 4352 4353 def _parse_tokens( 4354 self, parse_method: t.Callable, expressions: t.Dict 4355 ) -> t.Optional[exp.Expression]: 4356 this = parse_method() 4357 4358 while self._match_set(expressions): 4359 this = self.expression( 4360 expressions[self._prev.token_type], 4361 this=this, 4362 comments=self._prev_comments, 4363 expression=parse_method(), 4364 ) 4365 4366 return this 4367 4368 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4369 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4370 4371 def _parse_wrapped_csv( 4372 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4373 ) -> t.List[t.Optional[exp.Expression]]: 4374 return self._parse_wrapped( 4375 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4376 ) 4377 4378 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4379 wrapped = self._match(TokenType.L_PAREN) 4380 if not wrapped and not optional: 4381 self.raise_error("Expecting (") 4382 parse_result = parse_method() 4383 if wrapped: 4384 self._match_r_paren() 4385 return parse_result 4386 4387 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4388 return self._parse_csv(self._parse_expression) 4389 4390 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4391 return self._parse_select() or self._parse_set_operations( 4392 self._parse_expression() if alias else self._parse_conjunction() 4393 ) 4394 4395 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4396 return self._parse_query_modifiers( 4397 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4398 ) 4399 4400 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4401 this = None 4402 if self._match_texts(self.TRANSACTION_KIND): 4403 this = self._prev.text 4404 4405 self._match_texts({"TRANSACTION", "WORK"}) 4406 4407 modes = [] 4408 while True: 4409 mode = [] 4410 while self._match(TokenType.VAR): 4411 mode.append(self._prev.text) 4412 4413 if mode: 4414 modes.append(" ".join(mode)) 4415 if not self._match(TokenType.COMMA): 4416 break 4417 4418 return self.expression(exp.Transaction, this=this, modes=modes) 4419 4420 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4421 chain = None 4422 savepoint = None 4423 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4424 4425 self._match_texts({"TRANSACTION", "WORK"}) 4426 4427 if self._match_text_seq("TO"): 4428 self._match_text_seq("SAVEPOINT") 4429 savepoint = self._parse_id_var() 4430 4431 if self._match(TokenType.AND): 4432 chain = not self._match_text_seq("NO") 4433 self._match_text_seq("CHAIN") 4434 4435 if is_rollback: 4436 return self.expression(exp.Rollback, savepoint=savepoint) 4437 4438 return self.expression(exp.Commit, chain=chain) 4439 4440 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4441 if not self._match_text_seq("ADD"): 4442 return None 4443 4444 self._match(TokenType.COLUMN) 4445 exists_column = self._parse_exists(not_=True) 4446 expression = self._parse_column_def(self._parse_field(any_token=True)) 4447 4448 if expression: 4449 expression.set("exists", exists_column) 4450 4451 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4452 if self._match_texts(("FIRST", "AFTER")): 4453 position = self._prev.text 4454 column_position = self.expression( 4455 exp.ColumnPosition, this=self._parse_column(), position=position 4456 ) 4457 expression.set("position", column_position) 4458 4459 return expression 4460 4461 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4462 drop = self._match(TokenType.DROP) and self._parse_drop() 4463 if drop and not isinstance(drop, exp.Command): 4464 drop.set("kind", drop.args.get("kind", "COLUMN")) 4465 return drop 4466 4467 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4468 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4469 return self.expression( 4470 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4471 ) 4472 4473 def _parse_add_constraint(self) -> exp.AddConstraint: 4474 this = None 4475 kind = self._prev.token_type 4476 4477 if kind == TokenType.CONSTRAINT: 4478 this = self._parse_id_var() 4479 4480 if self._match_text_seq("CHECK"): 4481 expression = self._parse_wrapped(self._parse_conjunction) 4482 enforced = self._match_text_seq("ENFORCED") 4483 4484 return self.expression( 4485 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4486 ) 4487 4488 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4489 expression = self._parse_foreign_key() 4490 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4491 expression = self._parse_primary_key() 4492 else: 4493 expression = None 4494 4495 return self.expression(exp.AddConstraint, this=this, expression=expression) 4496 4497 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4498 index = self._index - 1 4499 4500 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4501 return self._parse_csv(self._parse_add_constraint) 4502 4503 self._retreat(index) 4504 return self._parse_csv(self._parse_add_column) 4505 4506 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4507 self._match(TokenType.COLUMN) 4508 column = self._parse_field(any_token=True) 4509 4510 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4511 return self.expression(exp.AlterColumn, this=column, drop=True) 4512 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4513 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4514 4515 self._match_text_seq("SET", "DATA") 4516 return self.expression( 4517 exp.AlterColumn, 4518 this=column, 4519 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4520 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4521 using=self._match(TokenType.USING) and self._parse_conjunction(), 4522 ) 4523 4524 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4525 index = self._index - 1 4526 4527 partition_exists = self._parse_exists() 4528 if self._match(TokenType.PARTITION, advance=False): 4529 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4530 4531 self._retreat(index) 4532 return self._parse_csv(self._parse_drop_column) 4533 4534 def _parse_alter_table_rename(self) -> exp.RenameTable: 4535 self._match_text_seq("TO") 4536 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4537 4538 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4539 start = self._prev 4540 4541 if not self._match(TokenType.TABLE): 4542 return self._parse_as_command(start) 4543 4544 exists = self._parse_exists() 4545 this = self._parse_table(schema=True) 4546 4547 if self._next: 4548 self._advance() 4549 4550 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4551 if parser: 4552 actions = ensure_list(parser(self)) 4553 4554 if not self._curr: 4555 return self.expression( 4556 exp.AlterTable, 4557 this=this, 4558 exists=exists, 4559 actions=actions, 4560 ) 4561 return self._parse_as_command(start) 4562 4563 def _parse_merge(self) -> exp.Merge: 4564 self._match(TokenType.INTO) 4565 target = self._parse_table() 4566 4567 self._match(TokenType.USING) 4568 using = self._parse_table() 4569 4570 self._match(TokenType.ON) 4571 on = self._parse_conjunction() 4572 4573 whens = [] 4574 while self._match(TokenType.WHEN): 4575 matched = not self._match(TokenType.NOT) 4576 self._match_text_seq("MATCHED") 4577 source = ( 4578 False 4579 if self._match_text_seq("BY", "TARGET") 4580 else self._match_text_seq("BY", "SOURCE") 4581 ) 4582 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4583 4584 self._match(TokenType.THEN) 4585 4586 if self._match(TokenType.INSERT): 4587 _this = self._parse_star() 4588 if _this: 4589 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4590 else: 4591 then = self.expression( 4592 exp.Insert, 4593 this=self._parse_value(), 4594 expression=self._match(TokenType.VALUES) and self._parse_value(), 4595 ) 4596 elif self._match(TokenType.UPDATE): 4597 expressions = self._parse_star() 4598 if expressions: 4599 then = self.expression(exp.Update, expressions=expressions) 4600 else: 4601 then = self.expression( 4602 exp.Update, 4603 expressions=self._match(TokenType.SET) 4604 and self._parse_csv(self._parse_equality), 4605 ) 4606 elif self._match(TokenType.DELETE): 4607 then = self.expression(exp.Var, this=self._prev.text) 4608 else: 4609 then = None 4610 4611 whens.append( 4612 self.expression( 4613 exp.When, 4614 matched=matched, 4615 source=source, 4616 condition=condition, 4617 then=then, 4618 ) 4619 ) 4620 4621 return self.expression( 4622 exp.Merge, 4623 this=target, 4624 using=using, 4625 on=on, 4626 expressions=whens, 4627 ) 4628 4629 def _parse_show(self) -> t.Optional[exp.Expression]: 4630 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4631 if parser: 4632 return parser(self) 4633 self._advance() 4634 return self.expression(exp.Show, this=self._prev.text.upper()) 4635 4636 def _parse_set_item_assignment( 4637 self, kind: t.Optional[str] = None 4638 ) -> t.Optional[exp.Expression]: 4639 index = self._index 4640 4641 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4642 return self._parse_set_transaction(global_=kind == "GLOBAL") 4643 4644 left = self._parse_primary() or self._parse_id_var() 4645 4646 if not self._match_texts(("=", "TO")): 4647 self._retreat(index) 4648 return None 4649 4650 right = self._parse_statement() or self._parse_id_var() 4651 this = self.expression(exp.EQ, this=left, expression=right) 4652 4653 return self.expression(exp.SetItem, this=this, kind=kind) 4654 4655 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4656 self._match_text_seq("TRANSACTION") 4657 characteristics = self._parse_csv( 4658 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4659 ) 4660 return self.expression( 4661 exp.SetItem, 4662 expressions=characteristics, 4663 kind="TRANSACTION", 4664 **{"global": global_}, # type: ignore 4665 ) 4666 4667 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4668 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4669 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4670 4671 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4672 index = self._index 4673 set_ = self.expression( 4674 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4675 ) 4676 4677 if self._curr: 4678 self._retreat(index) 4679 return self._parse_as_command(self._prev) 4680 4681 return set_ 4682 4683 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4684 for option in options: 4685 if self._match_text_seq(*option.split(" ")): 4686 return exp.var(option) 4687 return None 4688 4689 def _parse_as_command(self, start: Token) -> exp.Command: 4690 while self._curr: 4691 self._advance() 4692 text = self._find_sql(start, self._prev) 4693 size = len(start.text) 4694 return exp.Command(this=text[:size], expression=text[size:]) 4695 4696 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4697 settings = [] 4698 4699 self._match_l_paren() 4700 kind = self._parse_id_var() 4701 4702 if self._match(TokenType.L_PAREN): 4703 while True: 4704 key = self._parse_id_var() 4705 value = self._parse_primary() 4706 4707 if not key and value is None: 4708 break 4709 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4710 self._match(TokenType.R_PAREN) 4711 4712 self._match_r_paren() 4713 4714 return self.expression( 4715 exp.DictProperty, 4716 this=this, 4717 kind=kind.this if kind else None, 4718 settings=settings, 4719 ) 4720 4721 def _parse_dict_range(self, this: str) -> exp.DictRange: 4722 self._match_l_paren() 4723 has_min = self._match_text_seq("MIN") 4724 if has_min: 4725 min = self._parse_var() or self._parse_primary() 4726 self._match_text_seq("MAX") 4727 max = self._parse_var() or self._parse_primary() 4728 else: 4729 max = self._parse_var() or self._parse_primary() 4730 min = exp.Literal.number(0) 4731 self._match_r_paren() 4732 return self.expression(exp.DictRange, this=this, min=min, max=max) 4733 4734 def _find_parser( 4735 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4736 ) -> t.Optional[t.Callable]: 4737 if not self._curr: 4738 return None 4739 4740 index = self._index 4741 this = [] 4742 while True: 4743 # The current token might be multiple words 4744 curr = self._curr.text.upper() 4745 key = curr.split(" ") 4746 this.append(curr) 4747 4748 self._advance() 4749 result, trie = in_trie(trie, key) 4750 if result == TrieResult.FAILED: 4751 break 4752 4753 if result == TrieResult.EXISTS: 4754 subparser = parsers[" ".join(this)] 4755 return subparser 4756 4757 self._retreat(index) 4758 return None 4759 4760 def _match(self, token_type, advance=True, expression=None): 4761 if not self._curr: 4762 return None 4763 4764 if self._curr.token_type == token_type: 4765 if advance: 4766 self._advance() 4767 self._add_comments(expression) 4768 return True 4769 4770 return None 4771 4772 def _match_set(self, types, advance=True): 4773 if not self._curr: 4774 return None 4775 4776 if self._curr.token_type in types: 4777 if advance: 4778 self._advance() 4779 return True 4780 4781 return None 4782 4783 def _match_pair(self, token_type_a, token_type_b, advance=True): 4784 if not self._curr or not self._next: 4785 return None 4786 4787 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4788 if advance: 4789 self._advance(2) 4790 return True 4791 4792 return None 4793 4794 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4795 if not self._match(TokenType.L_PAREN, expression=expression): 4796 self.raise_error("Expecting (") 4797 4798 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4799 if not self._match(TokenType.R_PAREN, expression=expression): 4800 self.raise_error("Expecting )") 4801 4802 def _match_texts(self, texts, advance=True): 4803 if self._curr and self._curr.text.upper() in texts: 4804 if advance: 4805 self._advance() 4806 return True 4807 return False 4808 4809 def _match_text_seq(self, *texts, advance=True): 4810 index = self._index 4811 for text in texts: 4812 if self._curr and self._curr.text.upper() == text: 4813 self._advance() 4814 else: 4815 self._retreat(index) 4816 return False 4817 4818 if not advance: 4819 self._retreat(index) 4820 4821 return True 4822 4823 @t.overload 4824 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4825 ... 4826 4827 @t.overload 4828 def _replace_columns_with_dots( 4829 self, this: t.Optional[exp.Expression] 4830 ) -> t.Optional[exp.Expression]: 4831 ... 4832 4833 def _replace_columns_with_dots(self, this): 4834 if isinstance(this, exp.Dot): 4835 exp.replace_children(this, self._replace_columns_with_dots) 4836 elif isinstance(this, exp.Column): 4837 exp.replace_children(this, self._replace_columns_with_dots) 4838 table = this.args.get("table") 4839 this = ( 4840 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4841 ) 4842 4843 return this 4844 4845 def _replace_lambda( 4846 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4847 ) -> t.Optional[exp.Expression]: 4848 if not node: 4849 return node 4850 4851 for column in node.find_all(exp.Column): 4852 if column.parts[0].name in lambda_variables: 4853 dot_or_id = column.to_dot() if column.table else column.this 4854 parent = column.parent 4855 4856 while isinstance(parent, exp.Dot): 4857 if not isinstance(parent.parent, exp.Dot): 4858 parent.replace(dot_or_id) 4859 break 4860 parent = parent.parent 4861 else: 4862 if column is node: 4863 node = dot_or_id 4864 else: 4865 column.replace(dot_or_id) 4866 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.RLIKE, 332 TokenType.ROW, 333 TokenType.UNNEST, 334 TokenType.VAR, 335 TokenType.LEFT, 336 TokenType.RIGHT, 337 TokenType.DATE, 338 TokenType.DATETIME, 339 TokenType.TABLE, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.WINDOW, 343 TokenType.XOR, 344 *TYPE_TOKENS, 345 *SUBQUERY_PREDICATES, 346 } 347 348 CONJUNCTION = { 349 TokenType.AND: exp.And, 350 TokenType.OR: exp.Or, 351 } 352 353 EQUALITY = { 354 TokenType.EQ: exp.EQ, 355 TokenType.NEQ: exp.NEQ, 356 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 357 } 358 359 COMPARISON = { 360 TokenType.GT: exp.GT, 361 TokenType.GTE: exp.GTE, 362 TokenType.LT: exp.LT, 363 TokenType.LTE: exp.LTE, 364 } 365 366 BITWISE = { 367 TokenType.AMP: exp.BitwiseAnd, 368 TokenType.CARET: exp.BitwiseXor, 369 TokenType.PIPE: exp.BitwiseOr, 370 TokenType.DPIPE: exp.DPipe, 371 } 372 373 TERM = { 374 TokenType.DASH: exp.Sub, 375 TokenType.PLUS: exp.Add, 376 TokenType.MOD: exp.Mod, 377 TokenType.COLLATE: exp.Collate, 378 } 379 380 FACTOR = { 381 TokenType.DIV: exp.IntDiv, 382 TokenType.LR_ARROW: exp.Distance, 383 TokenType.SLASH: exp.Div, 384 TokenType.STAR: exp.Mul, 385 } 386 387 TIMESTAMPS = { 388 TokenType.TIME, 389 TokenType.TIMESTAMP, 390 TokenType.TIMESTAMPTZ, 391 TokenType.TIMESTAMPLTZ, 392 } 393 394 SET_OPERATIONS = { 395 TokenType.UNION, 396 TokenType.INTERSECT, 397 TokenType.EXCEPT, 398 } 399 400 JOIN_METHODS = { 401 TokenType.NATURAL, 402 TokenType.ASOF, 403 } 404 405 JOIN_SIDES = { 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.FULL, 409 } 410 411 JOIN_KINDS = { 412 TokenType.INNER, 413 TokenType.OUTER, 414 TokenType.CROSS, 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 JOIN_HINTS: t.Set[str] = set() 420 421 LAMBDAS = { 422 TokenType.ARROW: lambda self, expressions: self.expression( 423 exp.Lambda, 424 this=self._replace_lambda( 425 self._parse_conjunction(), 426 {node.name for node in expressions}, 427 ), 428 expressions=expressions, 429 ), 430 TokenType.FARROW: lambda self, expressions: self.expression( 431 exp.Kwarg, 432 this=exp.var(expressions[0].name), 433 expression=self._parse_conjunction(), 434 ), 435 } 436 437 COLUMN_OPERATORS = { 438 TokenType.DOT: None, 439 TokenType.DCOLON: lambda self, this, to: self.expression( 440 exp.Cast if self.STRICT_CAST else exp.TryCast, 441 this=this, 442 to=to, 443 ), 444 TokenType.ARROW: lambda self, this, path: self.expression( 445 exp.JSONExtract, 446 this=this, 447 expression=path, 448 ), 449 TokenType.DARROW: lambda self, this, path: self.expression( 450 exp.JSONExtractScalar, 451 this=this, 452 expression=path, 453 ), 454 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 455 exp.JSONBExtract, 456 this=this, 457 expression=path, 458 ), 459 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 460 exp.JSONBExtractScalar, 461 this=this, 462 expression=path, 463 ), 464 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 465 exp.JSONBContains, 466 this=this, 467 expression=key, 468 ), 469 } 470 471 EXPRESSION_PARSERS = { 472 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 473 exp.Column: lambda self: self._parse_column(), 474 exp.Condition: lambda self: self._parse_conjunction(), 475 exp.DataType: lambda self: self._parse_types(), 476 exp.Expression: lambda self: self._parse_statement(), 477 exp.From: lambda self: self._parse_from(), 478 exp.Group: lambda self: self._parse_group(), 479 exp.Having: lambda self: self._parse_having(), 480 exp.Identifier: lambda self: self._parse_id_var(), 481 exp.Join: lambda self: self._parse_join(), 482 exp.Lambda: lambda self: self._parse_lambda(), 483 exp.Lateral: lambda self: self._parse_lateral(), 484 exp.Limit: lambda self: self._parse_limit(), 485 exp.Offset: lambda self: self._parse_offset(), 486 exp.Order: lambda self: self._parse_order(), 487 exp.Ordered: lambda self: self._parse_ordered(), 488 exp.Properties: lambda self: self._parse_properties(), 489 exp.Qualify: lambda self: self._parse_qualify(), 490 exp.Returning: lambda self: self._parse_returning(), 491 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 492 exp.Table: lambda self: self._parse_table_parts(), 493 exp.TableAlias: lambda self: self._parse_table_alias(), 494 exp.Where: lambda self: self._parse_where(), 495 exp.Window: lambda self: self._parse_named_window(), 496 exp.With: lambda self: self._parse_with(), 497 "JOIN_TYPE": lambda self: self._parse_join_parts(), 498 } 499 500 STATEMENT_PARSERS = { 501 TokenType.ALTER: lambda self: self._parse_alter(), 502 TokenType.BEGIN: lambda self: self._parse_transaction(), 503 TokenType.CACHE: lambda self: self._parse_cache(), 504 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 505 TokenType.COMMENT: lambda self: self._parse_comment(), 506 TokenType.CREATE: lambda self: self._parse_create(), 507 TokenType.DELETE: lambda self: self._parse_delete(), 508 TokenType.DESC: lambda self: self._parse_describe(), 509 TokenType.DESCRIBE: lambda self: self._parse_describe(), 510 TokenType.DROP: lambda self: self._parse_drop(), 511 TokenType.FROM: lambda self: exp.select("*").from_( 512 t.cast(exp.From, self._parse_from(skip_from_token=True)) 513 ), 514 TokenType.INSERT: lambda self: self._parse_insert(), 515 TokenType.LOAD: lambda self: self._parse_load(), 516 TokenType.MERGE: lambda self: self._parse_merge(), 517 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 518 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 519 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 520 TokenType.SET: lambda self: self._parse_set(), 521 TokenType.UNCACHE: lambda self: self._parse_uncache(), 522 TokenType.UPDATE: lambda self: self._parse_update(), 523 TokenType.USE: lambda self: self.expression( 524 exp.Use, 525 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 526 and exp.var(self._prev.text), 527 this=self._parse_table(schema=False), 528 ), 529 } 530 531 UNARY_PARSERS = { 532 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 533 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 534 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 535 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 536 } 537 538 PRIMARY_PARSERS = { 539 TokenType.STRING: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=True 541 ), 542 TokenType.NUMBER: lambda self, token: self.expression( 543 exp.Literal, this=token.text, is_string=False 544 ), 545 TokenType.STAR: lambda self, _: self.expression( 546 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 547 ), 548 TokenType.NULL: lambda self, _: self.expression(exp.Null), 549 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 550 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 551 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 552 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 553 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 554 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 555 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 556 exp.National, this=token.text 557 ), 558 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 559 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 560 } 561 562 PLACEHOLDER_PARSERS = { 563 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 564 TokenType.PARAMETER: lambda self: self._parse_parameter(), 565 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 566 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 567 else None, 568 } 569 570 RANGE_PARSERS = { 571 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 572 TokenType.GLOB: binary_range_parser(exp.Glob), 573 TokenType.ILIKE: binary_range_parser(exp.ILike), 574 TokenType.IN: lambda self, this: self._parse_in(this), 575 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 576 TokenType.IS: lambda self, this: self._parse_is(this), 577 TokenType.LIKE: binary_range_parser(exp.Like), 578 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 579 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 580 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 581 } 582 583 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 584 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 585 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 586 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 587 "CHARACTER SET": lambda self: self._parse_character_set(), 588 "CHECKSUM": lambda self: self._parse_checksum(), 589 "CLUSTER BY": lambda self: self._parse_cluster(), 590 "CLUSTERED": lambda self: self._parse_clustered_by(), 591 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 592 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 593 "COPY": lambda self: self._parse_copy_property(), 594 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 595 "DEFINER": lambda self: self._parse_definer(), 596 "DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "DISTKEY": lambda self: self._parse_distkey(), 600 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 601 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 602 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 603 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 604 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 605 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 606 "FREESPACE": lambda self: self._parse_freespace(), 607 "IMMUTABLE": lambda self: self.expression( 608 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 609 ), 610 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 611 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 612 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 613 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 614 "LIKE": lambda self: self._parse_create_like(), 615 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 616 "LOCK": lambda self: self._parse_locking(), 617 "LOCKING": lambda self: self._parse_locking(), 618 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 619 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 620 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 621 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 622 "NO": lambda self: self._parse_no_property(), 623 "ON": lambda self: self._parse_on_property(), 624 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 625 "PARTITION BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 627 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 628 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 629 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 630 "RETURNS": lambda self: self._parse_returns(), 631 "ROW": lambda self: self._parse_row(), 632 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 633 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 634 "SETTINGS": lambda self: self.expression( 635 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 636 ), 637 "SORTKEY": lambda self: self._parse_sortkey(), 638 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 639 "STABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("STABLE") 641 ), 642 "STORED": lambda self: self._parse_stored(), 643 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 644 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 645 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 646 "TO": lambda self: self._parse_to_table(), 647 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 648 "TTL": lambda self: self._parse_ttl(), 649 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 650 "VOLATILE": lambda self: self._parse_volatile_property(), 651 "WITH": lambda self: self._parse_with_property(), 652 } 653 654 CONSTRAINT_PARSERS = { 655 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 656 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 657 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 658 "CHARACTER SET": lambda self: self.expression( 659 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 660 ), 661 "CHECK": lambda self: self.expression( 662 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 663 ), 664 "COLLATE": lambda self: self.expression( 665 exp.CollateColumnConstraint, this=self._parse_var() 666 ), 667 "COMMENT": lambda self: self.expression( 668 exp.CommentColumnConstraint, this=self._parse_string() 669 ), 670 "COMPRESS": lambda self: self._parse_compress(), 671 "DEFAULT": lambda self: self.expression( 672 exp.DefaultColumnConstraint, this=self._parse_bitwise() 673 ), 674 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 675 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 676 "FORMAT": lambda self: self.expression( 677 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 678 ), 679 "GENERATED": lambda self: self._parse_generated_as_identity(), 680 "IDENTITY": lambda self: self._parse_auto_increment(), 681 "INLINE": lambda self: self._parse_inline(), 682 "LIKE": lambda self: self._parse_create_like(), 683 "NOT": lambda self: self._parse_not_constraint(), 684 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 685 "ON": lambda self: self._match(TokenType.UPDATE) 686 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 687 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 688 "PRIMARY KEY": lambda self: self._parse_primary_key(), 689 "REFERENCES": lambda self: self._parse_references(match=False), 690 "TITLE": lambda self: self.expression( 691 exp.TitleColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 694 "UNIQUE": lambda self: self._parse_unique(), 695 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 696 } 697 698 ALTER_PARSERS = { 699 "ADD": lambda self: self._parse_alter_table_add(), 700 "ALTER": lambda self: self._parse_alter_table_alter(), 701 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 702 "DROP": lambda self: self._parse_alter_table_drop(), 703 "RENAME": lambda self: self._parse_alter_table_rename(), 704 } 705 706 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 707 708 NO_PAREN_FUNCTION_PARSERS = { 709 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 710 TokenType.CASE: lambda self: self._parse_case(), 711 TokenType.IF: lambda self: self._parse_if(), 712 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 713 exp.NextValueFor, 714 this=self._parse_column(), 715 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 716 ), 717 } 718 719 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 720 721 FUNCTION_PARSERS = { 722 "ANY_VALUE": lambda self: self._parse_any_value(), 723 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 724 "CONCAT": lambda self: self._parse_concat(), 725 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 726 "DECODE": lambda self: self._parse_decode(), 727 "EXTRACT": lambda self: self._parse_extract(), 728 "JSON_OBJECT": lambda self: self._parse_json_object(), 729 "LOG": lambda self: self._parse_logarithm(), 730 "MATCH": lambda self: self._parse_match_against(), 731 "OPENJSON": lambda self: self._parse_open_json(), 732 "POSITION": lambda self: self._parse_position(), 733 "SAFE_CAST": lambda self: self._parse_cast(False), 734 "STRING_AGG": lambda self: self._parse_string_agg(), 735 "SUBSTRING": lambda self: self._parse_substring(), 736 "TRIM": lambda self: self._parse_trim(), 737 "TRY_CAST": lambda self: self._parse_cast(False), 738 "TRY_CONVERT": lambda self: self._parse_convert(False), 739 } 740 741 QUERY_MODIFIER_PARSERS = { 742 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 743 TokenType.WHERE: lambda self: ("where", self._parse_where()), 744 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 745 TokenType.HAVING: lambda self: ("having", self._parse_having()), 746 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 747 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 748 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 749 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 750 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 751 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 752 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 753 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 754 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 756 TokenType.CLUSTER_BY: lambda self: ( 757 "cluster", 758 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 759 ), 760 TokenType.DISTRIBUTE_BY: lambda self: ( 761 "distribute", 762 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 763 ), 764 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 765 } 766 767 SET_PARSERS = { 768 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 769 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 770 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 771 "TRANSACTION": lambda self: self._parse_set_transaction(), 772 } 773 774 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 775 776 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 777 778 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 779 780 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 781 782 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 783 784 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 785 TRANSACTION_CHARACTERISTICS = { 786 "ISOLATION LEVEL REPEATABLE READ", 787 "ISOLATION LEVEL READ COMMITTED", 788 "ISOLATION LEVEL READ UNCOMMITTED", 789 "ISOLATION LEVEL SERIALIZABLE", 790 "READ WRITE", 791 "READ ONLY", 792 } 793 794 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 795 796 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 797 798 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 799 800 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 801 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 802 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 803 804 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 805 806 STRICT_CAST = True 807 808 # A NULL arg in CONCAT yields NULL by default 809 CONCAT_NULL_OUTPUTS_STRING = False 810 811 PREFIXED_PIVOT_COLUMNS = False 812 IDENTIFY_PIVOT_STRINGS = False 813 814 LOG_BASE_FIRST = True 815 LOG_DEFAULTS_TO_LN = False 816 817 __slots__ = ( 818 "error_level", 819 "error_message_context", 820 "max_errors", 821 "sql", 822 "errors", 823 "_tokens", 824 "_index", 825 "_curr", 826 "_next", 827 "_prev", 828 "_prev_comments", 829 ) 830 831 # Autofilled 832 INDEX_OFFSET: int = 0 833 UNNEST_COLUMN_ONLY: bool = False 834 ALIAS_POST_TABLESAMPLE: bool = False 835 STRICT_STRING_CONCAT = False 836 NULL_ORDERING: str = "nulls_are_small" 837 SHOW_TRIE: t.Dict = {} 838 SET_TRIE: t.Dict = {} 839 FORMAT_MAPPING: t.Dict[str, str] = {} 840 FORMAT_TRIE: t.Dict = {} 841 TIME_MAPPING: t.Dict[str, str] = {} 842 TIME_TRIE: t.Dict = {} 843 844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset() 854 855 def reset(self): 856 self.sql = "" 857 self.errors = [] 858 self._tokens = [] 859 self._index = 0 860 self._curr = None 861 self._next = None 862 self._prev = None 863 self._prev_comments = None 864 865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 ) 882 883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1] 918 919 def _parse( 920 self, 921 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 922 raw_tokens: t.List[Token], 923 sql: t.Optional[str] = None, 924 ) -> t.List[t.Optional[exp.Expression]]: 925 self.reset() 926 self.sql = sql or "" 927 928 total = len(raw_tokens) 929 chunks: t.List[t.List[Token]] = [[]] 930 931 for i, token in enumerate(raw_tokens): 932 if token.token_type == TokenType.SEMICOLON: 933 if i < total - 1: 934 chunks.append([]) 935 else: 936 chunks[-1].append(token) 937 938 expressions = [] 939 940 for tokens in chunks: 941 self._index = -1 942 self._tokens = tokens 943 self._advance() 944 945 expressions.append(parse_method(self)) 946 947 if self._index < len(self._tokens): 948 self.raise_error("Invalid expression / Unexpected token") 949 950 self.check_errors() 951 952 return expressions 953 954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 ) 964 965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error) 992 993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance) 1010 1011 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1012 if expression and self._prev_comments: 1013 expression.add_comments(self._prev_comments) 1014 self._prev_comments = None 1015 1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression 1032 1033 def _find_sql(self, start: Token, end: Token) -> str: 1034 return self.sql[start.start : end.end + 1] 1035 1036 def _advance(self, times: int = 1) -> None: 1037 self._index += times 1038 self._curr = seq_get(self._tokens, self._index) 1039 self._next = seq_get(self._tokens, self._index + 1) 1040 1041 if self._index > 0: 1042 self._prev = self._tokens[self._index - 1] 1043 self._prev_comments = self._prev.comments 1044 else: 1045 self._prev = None 1046 self._prev_comments = None 1047 1048 def _retreat(self, index: int) -> None: 1049 if index != self._index: 1050 self._advance(index - self._index) 1051 1052 def _parse_command(self) -> exp.Command: 1053 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1054 1055 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1056 start = self._prev 1057 exists = self._parse_exists() if allow_exists else None 1058 1059 self._match(TokenType.ON) 1060 1061 kind = self._match_set(self.CREATABLES) and self._prev 1062 if not kind: 1063 return self._parse_as_command(start) 1064 1065 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1066 this = self._parse_user_defined_function(kind=kind.token_type) 1067 elif kind.token_type == TokenType.TABLE: 1068 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1069 elif kind.token_type == TokenType.COLUMN: 1070 this = self._parse_column() 1071 else: 1072 this = self._parse_id_var() 1073 1074 self._match(TokenType.IS) 1075 1076 return self.expression( 1077 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1078 ) 1079 1080 def _parse_to_table( 1081 self, 1082 ) -> exp.ToTableProperty: 1083 table = self._parse_table_parts(schema=True) 1084 return self.expression(exp.ToTableProperty, this=table) 1085 1086 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1087 def _parse_ttl(self) -> exp.Expression: 1088 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1089 this = self._parse_bitwise() 1090 1091 if self._match_text_seq("DELETE"): 1092 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1093 if self._match_text_seq("RECOMPRESS"): 1094 return self.expression( 1095 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1096 ) 1097 if self._match_text_seq("TO", "DISK"): 1098 return self.expression( 1099 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1100 ) 1101 if self._match_text_seq("TO", "VOLUME"): 1102 return self.expression( 1103 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1104 ) 1105 1106 return this 1107 1108 expressions = self._parse_csv(_parse_ttl_action) 1109 where = self._parse_where() 1110 group = self._parse_group() 1111 1112 aggregates = None 1113 if group and self._match(TokenType.SET): 1114 aggregates = self._parse_csv(self._parse_set_item) 1115 1116 return self.expression( 1117 exp.MergeTreeTTL, 1118 expressions=expressions, 1119 where=where, 1120 group=group, 1121 aggregates=aggregates, 1122 ) 1123 1124 def _parse_statement(self) -> t.Optional[exp.Expression]: 1125 if self._curr is None: 1126 return None 1127 1128 if self._match_set(self.STATEMENT_PARSERS): 1129 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1130 1131 if self._match_set(Tokenizer.COMMANDS): 1132 return self._parse_command() 1133 1134 expression = self._parse_expression() 1135 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1136 return self._parse_query_modifiers(expression) 1137 1138 def _parse_drop(self) -> exp.Drop | exp.Command: 1139 start = self._prev 1140 temporary = self._match(TokenType.TEMPORARY) 1141 materialized = self._match_text_seq("MATERIALIZED") 1142 1143 kind = self._match_set(self.CREATABLES) and self._prev.text 1144 if not kind: 1145 return self._parse_as_command(start) 1146 1147 return self.expression( 1148 exp.Drop, 1149 comments=start.comments, 1150 exists=self._parse_exists(), 1151 this=self._parse_table(schema=True), 1152 kind=kind, 1153 temporary=temporary, 1154 materialized=materialized, 1155 cascade=self._match_text_seq("CASCADE"), 1156 constraints=self._match_text_seq("CONSTRAINTS"), 1157 purge=self._match_text_seq("PURGE"), 1158 ) 1159 1160 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1161 return ( 1162 self._match(TokenType.IF) 1163 and (not not_ or self._match(TokenType.NOT)) 1164 and self._match(TokenType.EXISTS) 1165 ) 1166 1167 def _parse_create(self) -> exp.Create | exp.Command: 1168 # Note: this can't be None because we've matched a statement parser 1169 start = self._prev 1170 replace = start.text.upper() == "REPLACE" or self._match_pair( 1171 TokenType.OR, TokenType.REPLACE 1172 ) 1173 unique = self._match(TokenType.UNIQUE) 1174 1175 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1176 self._advance() 1177 1178 properties = None 1179 create_token = self._match_set(self.CREATABLES) and self._prev 1180 1181 if not create_token: 1182 # exp.Properties.Location.POST_CREATE 1183 properties = self._parse_properties() 1184 create_token = self._match_set(self.CREATABLES) and self._prev 1185 1186 if not properties or not create_token: 1187 return self._parse_as_command(start) 1188 1189 exists = self._parse_exists(not_=True) 1190 this = None 1191 expression = None 1192 indexes = None 1193 no_schema_binding = None 1194 begin = None 1195 clone = None 1196 1197 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1198 nonlocal properties 1199 if properties and temp_props: 1200 properties.expressions.extend(temp_props.expressions) 1201 elif temp_props: 1202 properties = temp_props 1203 1204 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1205 this = self._parse_user_defined_function(kind=create_token.token_type) 1206 1207 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1208 extend_props(self._parse_properties()) 1209 1210 self._match(TokenType.ALIAS) 1211 begin = self._match(TokenType.BEGIN) 1212 return_ = self._match_text_seq("RETURN") 1213 expression = self._parse_statement() 1214 1215 if return_: 1216 expression = self.expression(exp.Return, this=expression) 1217 elif create_token.token_type == TokenType.INDEX: 1218 this = self._parse_index(index=self._parse_id_var()) 1219 elif create_token.token_type in self.DB_CREATABLES: 1220 table_parts = self._parse_table_parts(schema=True) 1221 1222 # exp.Properties.Location.POST_NAME 1223 self._match(TokenType.COMMA) 1224 extend_props(self._parse_properties(before=True)) 1225 1226 this = self._parse_schema(this=table_parts) 1227 1228 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1229 extend_props(self._parse_properties()) 1230 1231 self._match(TokenType.ALIAS) 1232 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1233 # exp.Properties.Location.POST_ALIAS 1234 extend_props(self._parse_properties()) 1235 1236 expression = self._parse_ddl_select() 1237 1238 if create_token.token_type == TokenType.TABLE: 1239 # exp.Properties.Location.POST_EXPRESSION 1240 extend_props(self._parse_properties()) 1241 1242 indexes = [] 1243 while True: 1244 index = self._parse_index() 1245 1246 # exp.Properties.Location.POST_INDEX 1247 extend_props(self._parse_properties()) 1248 1249 if not index: 1250 break 1251 else: 1252 self._match(TokenType.COMMA) 1253 indexes.append(index) 1254 elif create_token.token_type == TokenType.VIEW: 1255 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1256 no_schema_binding = True 1257 1258 if self._match_text_seq("CLONE"): 1259 clone = self._parse_table(schema=True) 1260 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1261 clone_kind = ( 1262 self._match(TokenType.L_PAREN) 1263 and self._match_texts(self.CLONE_KINDS) 1264 and self._prev.text.upper() 1265 ) 1266 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1267 self._match(TokenType.R_PAREN) 1268 clone = self.expression( 1269 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1270 ) 1271 1272 return self.expression( 1273 exp.Create, 1274 this=this, 1275 kind=create_token.text, 1276 replace=replace, 1277 unique=unique, 1278 expression=expression, 1279 exists=exists, 1280 properties=properties, 1281 indexes=indexes, 1282 no_schema_binding=no_schema_binding, 1283 begin=begin, 1284 clone=clone, 1285 ) 1286 1287 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1288 # only used for teradata currently 1289 self._match(TokenType.COMMA) 1290 1291 kwargs = { 1292 "no": self._match_text_seq("NO"), 1293 "dual": self._match_text_seq("DUAL"), 1294 "before": self._match_text_seq("BEFORE"), 1295 "default": self._match_text_seq("DEFAULT"), 1296 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1297 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1298 "after": self._match_text_seq("AFTER"), 1299 "minimum": self._match_texts(("MIN", "MINIMUM")), 1300 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1301 } 1302 1303 if self._match_texts(self.PROPERTY_PARSERS): 1304 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1305 try: 1306 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1307 except TypeError: 1308 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1309 1310 return None 1311 1312 def _parse_property(self) -> t.Optional[exp.Expression]: 1313 if self._match_texts(self.PROPERTY_PARSERS): 1314 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1315 1316 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1317 return self._parse_character_set(default=True) 1318 1319 if self._match_text_seq("COMPOUND", "SORTKEY"): 1320 return self._parse_sortkey(compound=True) 1321 1322 if self._match_text_seq("SQL", "SECURITY"): 1323 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1324 1325 assignment = self._match_pair( 1326 TokenType.VAR, TokenType.EQ, advance=False 1327 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1328 1329 if assignment: 1330 key = self._parse_var_or_string() 1331 self._match(TokenType.EQ) 1332 return self.expression(exp.Property, this=key, value=self._parse_column()) 1333 1334 return None 1335 1336 def _parse_stored(self) -> exp.FileFormatProperty: 1337 self._match(TokenType.ALIAS) 1338 1339 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1340 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1341 1342 return self.expression( 1343 exp.FileFormatProperty, 1344 this=self.expression( 1345 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1346 ) 1347 if input_format or output_format 1348 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1349 ) 1350 1351 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1352 self._match(TokenType.EQ) 1353 self._match(TokenType.ALIAS) 1354 return self.expression(exp_class, this=self._parse_field()) 1355 1356 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1357 properties = [] 1358 while True: 1359 if before: 1360 prop = self._parse_property_before() 1361 else: 1362 prop = self._parse_property() 1363 1364 if not prop: 1365 break 1366 for p in ensure_list(prop): 1367 properties.append(p) 1368 1369 if properties: 1370 return self.expression(exp.Properties, expressions=properties) 1371 1372 return None 1373 1374 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1375 return self.expression( 1376 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1377 ) 1378 1379 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1380 if self._index >= 2: 1381 pre_volatile_token = self._tokens[self._index - 2] 1382 else: 1383 pre_volatile_token = None 1384 1385 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1386 return exp.VolatileProperty() 1387 1388 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1389 1390 def _parse_with_property( 1391 self, 1392 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1393 if self._match(TokenType.L_PAREN, advance=False): 1394 return self._parse_wrapped_csv(self._parse_property) 1395 1396 if self._match_text_seq("JOURNAL"): 1397 return self._parse_withjournaltable() 1398 1399 if self._match_text_seq("DATA"): 1400 return self._parse_withdata(no=False) 1401 elif self._match_text_seq("NO", "DATA"): 1402 return self._parse_withdata(no=True) 1403 1404 if not self._next: 1405 return None 1406 1407 return self._parse_withisolatedloading() 1408 1409 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1410 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1411 self._match(TokenType.EQ) 1412 1413 user = self._parse_id_var() 1414 self._match(TokenType.PARAMETER) 1415 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1416 1417 if not user or not host: 1418 return None 1419 1420 return exp.DefinerProperty(this=f"{user}@{host}") 1421 1422 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1423 self._match(TokenType.TABLE) 1424 self._match(TokenType.EQ) 1425 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1426 1427 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1428 return self.expression(exp.LogProperty, no=no) 1429 1430 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1431 return self.expression(exp.JournalProperty, **kwargs) 1432 1433 def _parse_checksum(self) -> exp.ChecksumProperty: 1434 self._match(TokenType.EQ) 1435 1436 on = None 1437 if self._match(TokenType.ON): 1438 on = True 1439 elif self._match_text_seq("OFF"): 1440 on = False 1441 1442 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1443 1444 def _parse_cluster(self) -> exp.Cluster: 1445 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1446 1447 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1448 self._match_text_seq("BY") 1449 1450 self._match_l_paren() 1451 expressions = self._parse_csv(self._parse_column) 1452 self._match_r_paren() 1453 1454 if self._match_text_seq("SORTED", "BY"): 1455 self._match_l_paren() 1456 sorted_by = self._parse_csv(self._parse_ordered) 1457 self._match_r_paren() 1458 else: 1459 sorted_by = None 1460 1461 self._match(TokenType.INTO) 1462 buckets = self._parse_number() 1463 self._match_text_seq("BUCKETS") 1464 1465 return self.expression( 1466 exp.ClusteredByProperty, 1467 expressions=expressions, 1468 sorted_by=sorted_by, 1469 buckets=buckets, 1470 ) 1471 1472 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1473 if not self._match_text_seq("GRANTS"): 1474 self._retreat(self._index - 1) 1475 return None 1476 1477 return self.expression(exp.CopyGrantsProperty) 1478 1479 def _parse_freespace(self) -> exp.FreespaceProperty: 1480 self._match(TokenType.EQ) 1481 return self.expression( 1482 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1483 ) 1484 1485 def _parse_mergeblockratio( 1486 self, no: bool = False, default: bool = False 1487 ) -> exp.MergeBlockRatioProperty: 1488 if self._match(TokenType.EQ): 1489 return self.expression( 1490 exp.MergeBlockRatioProperty, 1491 this=self._parse_number(), 1492 percent=self._match(TokenType.PERCENT), 1493 ) 1494 1495 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1496 1497 def _parse_datablocksize( 1498 self, 1499 default: t.Optional[bool] = None, 1500 minimum: t.Optional[bool] = None, 1501 maximum: t.Optional[bool] = None, 1502 ) -> exp.DataBlocksizeProperty: 1503 self._match(TokenType.EQ) 1504 size = self._parse_number() 1505 1506 units = None 1507 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1508 units = self._prev.text 1509 1510 return self.expression( 1511 exp.DataBlocksizeProperty, 1512 size=size, 1513 units=units, 1514 default=default, 1515 minimum=minimum, 1516 maximum=maximum, 1517 ) 1518 1519 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1520 self._match(TokenType.EQ) 1521 always = self._match_text_seq("ALWAYS") 1522 manual = self._match_text_seq("MANUAL") 1523 never = self._match_text_seq("NEVER") 1524 default = self._match_text_seq("DEFAULT") 1525 1526 autotemp = None 1527 if self._match_text_seq("AUTOTEMP"): 1528 autotemp = self._parse_schema() 1529 1530 return self.expression( 1531 exp.BlockCompressionProperty, 1532 always=always, 1533 manual=manual, 1534 never=never, 1535 default=default, 1536 autotemp=autotemp, 1537 ) 1538 1539 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1540 no = self._match_text_seq("NO") 1541 concurrent = self._match_text_seq("CONCURRENT") 1542 self._match_text_seq("ISOLATED", "LOADING") 1543 for_all = self._match_text_seq("FOR", "ALL") 1544 for_insert = self._match_text_seq("FOR", "INSERT") 1545 for_none = self._match_text_seq("FOR", "NONE") 1546 return self.expression( 1547 exp.IsolatedLoadingProperty, 1548 no=no, 1549 concurrent=concurrent, 1550 for_all=for_all, 1551 for_insert=for_insert, 1552 for_none=for_none, 1553 ) 1554 1555 def _parse_locking(self) -> exp.LockingProperty: 1556 if self._match(TokenType.TABLE): 1557 kind = "TABLE" 1558 elif self._match(TokenType.VIEW): 1559 kind = "VIEW" 1560 elif self._match(TokenType.ROW): 1561 kind = "ROW" 1562 elif self._match_text_seq("DATABASE"): 1563 kind = "DATABASE" 1564 else: 1565 kind = None 1566 1567 if kind in ("DATABASE", "TABLE", "VIEW"): 1568 this = self._parse_table_parts() 1569 else: 1570 this = None 1571 1572 if self._match(TokenType.FOR): 1573 for_or_in = "FOR" 1574 elif self._match(TokenType.IN): 1575 for_or_in = "IN" 1576 else: 1577 for_or_in = None 1578 1579 if self._match_text_seq("ACCESS"): 1580 lock_type = "ACCESS" 1581 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1582 lock_type = "EXCLUSIVE" 1583 elif self._match_text_seq("SHARE"): 1584 lock_type = "SHARE" 1585 elif self._match_text_seq("READ"): 1586 lock_type = "READ" 1587 elif self._match_text_seq("WRITE"): 1588 lock_type = "WRITE" 1589 elif self._match_text_seq("CHECKSUM"): 1590 lock_type = "CHECKSUM" 1591 else: 1592 lock_type = None 1593 1594 override = self._match_text_seq("OVERRIDE") 1595 1596 return self.expression( 1597 exp.LockingProperty, 1598 this=this, 1599 kind=kind, 1600 for_or_in=for_or_in, 1601 lock_type=lock_type, 1602 override=override, 1603 ) 1604 1605 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1606 if self._match(TokenType.PARTITION_BY): 1607 return self._parse_csv(self._parse_conjunction) 1608 return [] 1609 1610 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1611 self._match(TokenType.EQ) 1612 return self.expression( 1613 exp.PartitionedByProperty, 1614 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1615 ) 1616 1617 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1618 if self._match_text_seq("AND", "STATISTICS"): 1619 statistics = True 1620 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1621 statistics = False 1622 else: 1623 statistics = None 1624 1625 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1626 1627 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1628 if self._match_text_seq("PRIMARY", "INDEX"): 1629 return exp.NoPrimaryIndexProperty() 1630 return None 1631 1632 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1633 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1634 return exp.OnCommitProperty() 1635 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1636 return exp.OnCommitProperty(delete=True) 1637 return None 1638 1639 def _parse_distkey(self) -> exp.DistKeyProperty: 1640 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1641 1642 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1643 table = self._parse_table(schema=True) 1644 1645 options = [] 1646 while self._match_texts(("INCLUDING", "EXCLUDING")): 1647 this = self._prev.text.upper() 1648 1649 id_var = self._parse_id_var() 1650 if not id_var: 1651 return None 1652 1653 options.append( 1654 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1655 ) 1656 1657 return self.expression(exp.LikeProperty, this=table, expressions=options) 1658 1659 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1660 return self.expression( 1661 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1662 ) 1663 1664 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1665 self._match(TokenType.EQ) 1666 return self.expression( 1667 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1668 ) 1669 1670 def _parse_returns(self) -> exp.ReturnsProperty: 1671 value: t.Optional[exp.Expression] 1672 is_table = self._match(TokenType.TABLE) 1673 1674 if is_table: 1675 if self._match(TokenType.LT): 1676 value = self.expression( 1677 exp.Schema, 1678 this="TABLE", 1679 expressions=self._parse_csv(self._parse_struct_types), 1680 ) 1681 if not self._match(TokenType.GT): 1682 self.raise_error("Expecting >") 1683 else: 1684 value = self._parse_schema(exp.var("TABLE")) 1685 else: 1686 value = self._parse_types() 1687 1688 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1689 1690 def _parse_describe(self) -> exp.Describe: 1691 kind = self._match_set(self.CREATABLES) and self._prev.text 1692 this = self._parse_table() 1693 return self.expression(exp.Describe, this=this, kind=kind) 1694 1695 def _parse_insert(self) -> exp.Insert: 1696 comments = ensure_list(self._prev_comments) 1697 overwrite = self._match(TokenType.OVERWRITE) 1698 ignore = self._match(TokenType.IGNORE) 1699 local = self._match_text_seq("LOCAL") 1700 alternative = None 1701 1702 if self._match_text_seq("DIRECTORY"): 1703 this: t.Optional[exp.Expression] = self.expression( 1704 exp.Directory, 1705 this=self._parse_var_or_string(), 1706 local=local, 1707 row_format=self._parse_row_format(match_row=True), 1708 ) 1709 else: 1710 if self._match(TokenType.OR): 1711 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1712 1713 self._match(TokenType.INTO) 1714 comments += ensure_list(self._prev_comments) 1715 self._match(TokenType.TABLE) 1716 this = self._parse_table(schema=True) 1717 1718 returning = self._parse_returning() 1719 1720 return self.expression( 1721 exp.Insert, 1722 comments=comments, 1723 this=this, 1724 exists=self._parse_exists(), 1725 partition=self._parse_partition(), 1726 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1727 and self._parse_conjunction(), 1728 expression=self._parse_ddl_select(), 1729 conflict=self._parse_on_conflict(), 1730 returning=returning or self._parse_returning(), 1731 overwrite=overwrite, 1732 alternative=alternative, 1733 ignore=ignore, 1734 ) 1735 1736 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1737 conflict = self._match_text_seq("ON", "CONFLICT") 1738 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1739 1740 if not conflict and not duplicate: 1741 return None 1742 1743 nothing = None 1744 expressions = None 1745 key = None 1746 constraint = None 1747 1748 if conflict: 1749 if self._match_text_seq("ON", "CONSTRAINT"): 1750 constraint = self._parse_id_var() 1751 else: 1752 key = self._parse_csv(self._parse_value) 1753 1754 self._match_text_seq("DO") 1755 if self._match_text_seq("NOTHING"): 1756 nothing = True 1757 else: 1758 self._match(TokenType.UPDATE) 1759 self._match(TokenType.SET) 1760 expressions = self._parse_csv(self._parse_equality) 1761 1762 return self.expression( 1763 exp.OnConflict, 1764 duplicate=duplicate, 1765 expressions=expressions, 1766 nothing=nothing, 1767 key=key, 1768 constraint=constraint, 1769 ) 1770 1771 def _parse_returning(self) -> t.Optional[exp.Returning]: 1772 if not self._match(TokenType.RETURNING): 1773 return None 1774 return self.expression( 1775 exp.Returning, 1776 expressions=self._parse_csv(self._parse_expression), 1777 into=self._match(TokenType.INTO) and self._parse_table_part(), 1778 ) 1779 1780 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1781 if not self._match(TokenType.FORMAT): 1782 return None 1783 return self._parse_row_format() 1784 1785 def _parse_row_format( 1786 self, match_row: bool = False 1787 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1788 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1789 return None 1790 1791 if self._match_text_seq("SERDE"): 1792 this = self._parse_string() 1793 1794 serde_properties = None 1795 if self._match(TokenType.SERDE_PROPERTIES): 1796 serde_properties = self.expression( 1797 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1798 ) 1799 1800 return self.expression( 1801 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1802 ) 1803 1804 self._match_text_seq("DELIMITED") 1805 1806 kwargs = {} 1807 1808 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1809 kwargs["fields"] = self._parse_string() 1810 if self._match_text_seq("ESCAPED", "BY"): 1811 kwargs["escaped"] = self._parse_string() 1812 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1813 kwargs["collection_items"] = self._parse_string() 1814 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1815 kwargs["map_keys"] = self._parse_string() 1816 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1817 kwargs["lines"] = self._parse_string() 1818 if self._match_text_seq("NULL", "DEFINED", "AS"): 1819 kwargs["null"] = self._parse_string() 1820 1821 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1822 1823 def _parse_load(self) -> exp.LoadData | exp.Command: 1824 if self._match_text_seq("DATA"): 1825 local = self._match_text_seq("LOCAL") 1826 self._match_text_seq("INPATH") 1827 inpath = self._parse_string() 1828 overwrite = self._match(TokenType.OVERWRITE) 1829 self._match_pair(TokenType.INTO, TokenType.TABLE) 1830 1831 return self.expression( 1832 exp.LoadData, 1833 this=self._parse_table(schema=True), 1834 local=local, 1835 overwrite=overwrite, 1836 inpath=inpath, 1837 partition=self._parse_partition(), 1838 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1839 serde=self._match_text_seq("SERDE") and self._parse_string(), 1840 ) 1841 return self._parse_as_command(self._prev) 1842 1843 def _parse_delete(self) -> exp.Delete: 1844 # This handles MySQL's "Multiple-Table Syntax" 1845 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1846 tables = None 1847 comments = self._prev_comments 1848 if not self._match(TokenType.FROM, advance=False): 1849 tables = self._parse_csv(self._parse_table) or None 1850 1851 returning = self._parse_returning() 1852 1853 return self.expression( 1854 exp.Delete, 1855 comments=comments, 1856 tables=tables, 1857 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1858 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1859 where=self._parse_where(), 1860 returning=returning or self._parse_returning(), 1861 limit=self._parse_limit(), 1862 ) 1863 1864 def _parse_update(self) -> exp.Update: 1865 comments = self._prev_comments 1866 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1867 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1868 returning = self._parse_returning() 1869 return self.expression( 1870 exp.Update, 1871 comments=comments, 1872 **{ # type: ignore 1873 "this": this, 1874 "expressions": expressions, 1875 "from": self._parse_from(joins=True), 1876 "where": self._parse_where(), 1877 "returning": returning or self._parse_returning(), 1878 "limit": self._parse_limit(), 1879 }, 1880 ) 1881 1882 def _parse_uncache(self) -> exp.Uncache: 1883 if not self._match(TokenType.TABLE): 1884 self.raise_error("Expecting TABLE after UNCACHE") 1885 1886 return self.expression( 1887 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1888 ) 1889 1890 def _parse_cache(self) -> exp.Cache: 1891 lazy = self._match_text_seq("LAZY") 1892 self._match(TokenType.TABLE) 1893 table = self._parse_table(schema=True) 1894 1895 options = [] 1896 if self._match_text_seq("OPTIONS"): 1897 self._match_l_paren() 1898 k = self._parse_string() 1899 self._match(TokenType.EQ) 1900 v = self._parse_string() 1901 options = [k, v] 1902 self._match_r_paren() 1903 1904 self._match(TokenType.ALIAS) 1905 return self.expression( 1906 exp.Cache, 1907 this=table, 1908 lazy=lazy, 1909 options=options, 1910 expression=self._parse_select(nested=True), 1911 ) 1912 1913 def _parse_partition(self) -> t.Optional[exp.Partition]: 1914 if not self._match(TokenType.PARTITION): 1915 return None 1916 1917 return self.expression( 1918 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1919 ) 1920 1921 def _parse_value(self) -> exp.Tuple: 1922 if self._match(TokenType.L_PAREN): 1923 expressions = self._parse_csv(self._parse_conjunction) 1924 self._match_r_paren() 1925 return self.expression(exp.Tuple, expressions=expressions) 1926 1927 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1928 # https://prestodb.io/docs/current/sql/values.html 1929 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1930 1931 def _parse_select( 1932 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1933 ) -> t.Optional[exp.Expression]: 1934 cte = self._parse_with() 1935 if cte: 1936 this = self._parse_statement() 1937 1938 if not this: 1939 self.raise_error("Failed to parse any statement following CTE") 1940 return cte 1941 1942 if "with" in this.arg_types: 1943 this.set("with", cte) 1944 else: 1945 self.raise_error(f"{this.key} does not support CTE") 1946 this = cte 1947 elif self._match(TokenType.SELECT): 1948 comments = self._prev_comments 1949 1950 hint = self._parse_hint() 1951 all_ = self._match(TokenType.ALL) 1952 distinct = self._match(TokenType.DISTINCT) 1953 1954 kind = ( 1955 self._match(TokenType.ALIAS) 1956 and self._match_texts(("STRUCT", "VALUE")) 1957 and self._prev.text 1958 ) 1959 1960 if distinct: 1961 distinct = self.expression( 1962 exp.Distinct, 1963 on=self._parse_value() if self._match(TokenType.ON) else None, 1964 ) 1965 1966 if all_ and distinct: 1967 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1968 1969 limit = self._parse_limit(top=True) 1970 expressions = self._parse_expressions() 1971 1972 this = self.expression( 1973 exp.Select, 1974 kind=kind, 1975 hint=hint, 1976 distinct=distinct, 1977 expressions=expressions, 1978 limit=limit, 1979 ) 1980 this.comments = comments 1981 1982 into = self._parse_into() 1983 if into: 1984 this.set("into", into) 1985 1986 from_ = self._parse_from() 1987 if from_: 1988 this.set("from", from_) 1989 1990 this = self._parse_query_modifiers(this) 1991 elif (table or nested) and self._match(TokenType.L_PAREN): 1992 if self._match(TokenType.PIVOT): 1993 this = self._parse_simplified_pivot() 1994 elif self._match(TokenType.FROM): 1995 this = exp.select("*").from_( 1996 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1997 ) 1998 else: 1999 this = self._parse_table() if table else self._parse_select(nested=True) 2000 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2001 2002 self._match_r_paren() 2003 2004 # We return early here so that the UNION isn't attached to the subquery by the 2005 # following call to _parse_set_operations, but instead becomes the parent node 2006 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2007 elif self._match(TokenType.VALUES): 2008 this = self.expression( 2009 exp.Values, 2010 expressions=self._parse_csv(self._parse_value), 2011 alias=self._parse_table_alias(), 2012 ) 2013 else: 2014 this = None 2015 2016 return self._parse_set_operations(this) 2017 2018 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2019 if not skip_with_token and not self._match(TokenType.WITH): 2020 return None 2021 2022 comments = self._prev_comments 2023 recursive = self._match(TokenType.RECURSIVE) 2024 2025 expressions = [] 2026 while True: 2027 expressions.append(self._parse_cte()) 2028 2029 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2030 break 2031 else: 2032 self._match(TokenType.WITH) 2033 2034 return self.expression( 2035 exp.With, comments=comments, expressions=expressions, recursive=recursive 2036 ) 2037 2038 def _parse_cte(self) -> exp.CTE: 2039 alias = self._parse_table_alias() 2040 if not alias or not alias.this: 2041 self.raise_error("Expected CTE to have alias") 2042 2043 self._match(TokenType.ALIAS) 2044 return self.expression( 2045 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2046 ) 2047 2048 def _parse_table_alias( 2049 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2050 ) -> t.Optional[exp.TableAlias]: 2051 any_token = self._match(TokenType.ALIAS) 2052 alias = ( 2053 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2054 or self._parse_string_as_identifier() 2055 ) 2056 2057 index = self._index 2058 if self._match(TokenType.L_PAREN): 2059 columns = self._parse_csv(self._parse_function_parameter) 2060 self._match_r_paren() if columns else self._retreat(index) 2061 else: 2062 columns = None 2063 2064 if not alias and not columns: 2065 return None 2066 2067 return self.expression(exp.TableAlias, this=alias, columns=columns) 2068 2069 def _parse_subquery( 2070 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2071 ) -> t.Optional[exp.Subquery]: 2072 if not this: 2073 return None 2074 2075 return self.expression( 2076 exp.Subquery, 2077 this=this, 2078 pivots=self._parse_pivots(), 2079 alias=self._parse_table_alias() if parse_alias else None, 2080 ) 2081 2082 def _parse_query_modifiers( 2083 self, this: t.Optional[exp.Expression] 2084 ) -> t.Optional[exp.Expression]: 2085 if isinstance(this, self.MODIFIABLES): 2086 for join in iter(self._parse_join, None): 2087 this.append("joins", join) 2088 for lateral in iter(self._parse_lateral, None): 2089 this.append("laterals", lateral) 2090 2091 while True: 2092 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2093 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2094 key, expression = parser(self) 2095 2096 if expression: 2097 this.set(key, expression) 2098 if key == "limit": 2099 offset = expression.args.pop("offset", None) 2100 if offset: 2101 this.set("offset", exp.Offset(expression=offset)) 2102 continue 2103 break 2104 return this 2105 2106 def _parse_hint(self) -> t.Optional[exp.Hint]: 2107 if self._match(TokenType.HINT): 2108 hints = [] 2109 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2110 hints.extend(hint) 2111 2112 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2113 self.raise_error("Expected */ after HINT") 2114 2115 return self.expression(exp.Hint, expressions=hints) 2116 2117 return None 2118 2119 def _parse_into(self) -> t.Optional[exp.Into]: 2120 if not self._match(TokenType.INTO): 2121 return None 2122 2123 temp = self._match(TokenType.TEMPORARY) 2124 unlogged = self._match_text_seq("UNLOGGED") 2125 self._match(TokenType.TABLE) 2126 2127 return self.expression( 2128 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2129 ) 2130 2131 def _parse_from( 2132 self, joins: bool = False, skip_from_token: bool = False 2133 ) -> t.Optional[exp.From]: 2134 if not skip_from_token and not self._match(TokenType.FROM): 2135 return None 2136 2137 return self.expression( 2138 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2139 ) 2140 2141 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2142 if not self._match(TokenType.MATCH_RECOGNIZE): 2143 return None 2144 2145 self._match_l_paren() 2146 2147 partition = self._parse_partition_by() 2148 order = self._parse_order() 2149 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2150 2151 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2152 rows = exp.var("ONE ROW PER MATCH") 2153 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2154 text = "ALL ROWS PER MATCH" 2155 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2156 text += f" SHOW EMPTY MATCHES" 2157 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2158 text += f" OMIT EMPTY MATCHES" 2159 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2160 text += f" WITH UNMATCHED ROWS" 2161 rows = exp.var(text) 2162 else: 2163 rows = None 2164 2165 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2166 text = "AFTER MATCH SKIP" 2167 if self._match_text_seq("PAST", "LAST", "ROW"): 2168 text += f" PAST LAST ROW" 2169 elif self._match_text_seq("TO", "NEXT", "ROW"): 2170 text += f" TO NEXT ROW" 2171 elif self._match_text_seq("TO", "FIRST"): 2172 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2173 elif self._match_text_seq("TO", "LAST"): 2174 text += f" TO LAST {self._advance_any().text}" # type: ignore 2175 after = exp.var(text) 2176 else: 2177 after = None 2178 2179 if self._match_text_seq("PATTERN"): 2180 self._match_l_paren() 2181 2182 if not self._curr: 2183 self.raise_error("Expecting )", self._curr) 2184 2185 paren = 1 2186 start = self._curr 2187 2188 while self._curr and paren > 0: 2189 if self._curr.token_type == TokenType.L_PAREN: 2190 paren += 1 2191 if self._curr.token_type == TokenType.R_PAREN: 2192 paren -= 1 2193 2194 end = self._prev 2195 self._advance() 2196 2197 if paren > 0: 2198 self.raise_error("Expecting )", self._curr) 2199 2200 pattern = exp.var(self._find_sql(start, end)) 2201 else: 2202 pattern = None 2203 2204 define = ( 2205 self._parse_csv( 2206 lambda: self.expression( 2207 exp.Alias, 2208 alias=self._parse_id_var(any_token=True), 2209 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2210 ) 2211 ) 2212 if self._match_text_seq("DEFINE") 2213 else None 2214 ) 2215 2216 self._match_r_paren() 2217 2218 return self.expression( 2219 exp.MatchRecognize, 2220 partition_by=partition, 2221 order=order, 2222 measures=measures, 2223 rows=rows, 2224 after=after, 2225 pattern=pattern, 2226 define=define, 2227 alias=self._parse_table_alias(), 2228 ) 2229 2230 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2231 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2232 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2233 2234 if outer_apply or cross_apply: 2235 this = self._parse_select(table=True) 2236 view = None 2237 outer = not cross_apply 2238 elif self._match(TokenType.LATERAL): 2239 this = self._parse_select(table=True) 2240 view = self._match(TokenType.VIEW) 2241 outer = self._match(TokenType.OUTER) 2242 else: 2243 return None 2244 2245 if not this: 2246 this = ( 2247 self._parse_unnest() 2248 or self._parse_function() 2249 or self._parse_id_var(any_token=False) 2250 ) 2251 2252 while self._match(TokenType.DOT): 2253 this = exp.Dot( 2254 this=this, 2255 expression=self._parse_function() or self._parse_id_var(any_token=False), 2256 ) 2257 2258 if view: 2259 table = self._parse_id_var(any_token=False) 2260 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2261 table_alias: t.Optional[exp.TableAlias] = self.expression( 2262 exp.TableAlias, this=table, columns=columns 2263 ) 2264 elif isinstance(this, exp.Subquery) and this.alias: 2265 # Ensures parity between the Subquery's and the Lateral's "alias" args 2266 table_alias = this.args["alias"].copy() 2267 else: 2268 table_alias = self._parse_table_alias() 2269 2270 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2271 2272 def _parse_join_parts( 2273 self, 2274 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2275 return ( 2276 self._match_set(self.JOIN_METHODS) and self._prev, 2277 self._match_set(self.JOIN_SIDES) and self._prev, 2278 self._match_set(self.JOIN_KINDS) and self._prev, 2279 ) 2280 2281 def _parse_join( 2282 self, skip_join_token: bool = False, parse_bracket: bool = False 2283 ) -> t.Optional[exp.Join]: 2284 if self._match(TokenType.COMMA): 2285 return self.expression(exp.Join, this=self._parse_table()) 2286 2287 index = self._index 2288 method, side, kind = self._parse_join_parts() 2289 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2290 join = self._match(TokenType.JOIN) 2291 2292 if not skip_join_token and not join: 2293 self._retreat(index) 2294 kind = None 2295 method = None 2296 side = None 2297 2298 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2299 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2300 2301 if not skip_join_token and not join and not outer_apply and not cross_apply: 2302 return None 2303 2304 if outer_apply: 2305 side = Token(TokenType.LEFT, "LEFT") 2306 2307 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2308 2309 if method: 2310 kwargs["method"] = method.text 2311 if side: 2312 kwargs["side"] = side.text 2313 if kind: 2314 kwargs["kind"] = kind.text 2315 if hint: 2316 kwargs["hint"] = hint 2317 2318 if self._match(TokenType.ON): 2319 kwargs["on"] = self._parse_conjunction() 2320 elif self._match(TokenType.USING): 2321 kwargs["using"] = self._parse_wrapped_id_vars() 2322 elif not (kind and kind.token_type == TokenType.CROSS): 2323 index = self._index 2324 joins = self._parse_joins() 2325 2326 if joins and self._match(TokenType.ON): 2327 kwargs["on"] = self._parse_conjunction() 2328 elif joins and self._match(TokenType.USING): 2329 kwargs["using"] = self._parse_wrapped_id_vars() 2330 else: 2331 joins = None 2332 self._retreat(index) 2333 2334 kwargs["this"].set("joins", joins) 2335 2336 return self.expression(exp.Join, **kwargs) 2337 2338 def _parse_index( 2339 self, 2340 index: t.Optional[exp.Expression] = None, 2341 ) -> t.Optional[exp.Index]: 2342 if index: 2343 unique = None 2344 primary = None 2345 amp = None 2346 2347 self._match(TokenType.ON) 2348 self._match(TokenType.TABLE) # hive 2349 table = self._parse_table_parts(schema=True) 2350 else: 2351 unique = self._match(TokenType.UNIQUE) 2352 primary = self._match_text_seq("PRIMARY") 2353 amp = self._match_text_seq("AMP") 2354 2355 if not self._match(TokenType.INDEX): 2356 return None 2357 2358 index = self._parse_id_var() 2359 table = None 2360 2361 using = self._parse_field() if self._match(TokenType.USING) else None 2362 2363 if self._match(TokenType.L_PAREN, advance=False): 2364 columns = self._parse_wrapped_csv(self._parse_ordered) 2365 else: 2366 columns = None 2367 2368 return self.expression( 2369 exp.Index, 2370 this=index, 2371 table=table, 2372 using=using, 2373 columns=columns, 2374 unique=unique, 2375 primary=primary, 2376 amp=amp, 2377 partition_by=self._parse_partition_by(), 2378 ) 2379 2380 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2381 hints: t.List[exp.Expression] = [] 2382 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2383 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2384 hints.append( 2385 self.expression( 2386 exp.WithTableHint, 2387 expressions=self._parse_csv( 2388 lambda: self._parse_function() or self._parse_var(any_token=True) 2389 ), 2390 ) 2391 ) 2392 self._match_r_paren() 2393 else: 2394 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2395 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2396 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2397 2398 self._match_texts({"INDEX", "KEY"}) 2399 if self._match(TokenType.FOR): 2400 hint.set("target", self._advance_any() and self._prev.text.upper()) 2401 2402 hint.set("expressions", self._parse_wrapped_id_vars()) 2403 hints.append(hint) 2404 2405 return hints or None 2406 2407 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2408 return ( 2409 (not schema and self._parse_function(optional_parens=False)) 2410 or self._parse_id_var(any_token=False) 2411 or self._parse_string_as_identifier() 2412 or self._parse_placeholder() 2413 ) 2414 2415 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2416 catalog = None 2417 db = None 2418 table = self._parse_table_part(schema=schema) 2419 2420 while self._match(TokenType.DOT): 2421 if catalog: 2422 # This allows nesting the table in arbitrarily many dot expressions if needed 2423 table = self.expression( 2424 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2425 ) 2426 else: 2427 catalog = db 2428 db = table 2429 table = self._parse_table_part(schema=schema) 2430 2431 if not table: 2432 self.raise_error(f"Expected table name but got {self._curr}") 2433 2434 return self.expression( 2435 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2436 ) 2437 2438 def _parse_table( 2439 self, 2440 schema: bool = False, 2441 joins: bool = False, 2442 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2443 parse_bracket: bool = False, 2444 ) -> t.Optional[exp.Expression]: 2445 lateral = self._parse_lateral() 2446 if lateral: 2447 return lateral 2448 2449 unnest = self._parse_unnest() 2450 if unnest: 2451 return unnest 2452 2453 values = self._parse_derived_table_values() 2454 if values: 2455 return values 2456 2457 subquery = self._parse_select(table=True) 2458 if subquery: 2459 if not subquery.args.get("pivots"): 2460 subquery.set("pivots", self._parse_pivots()) 2461 return subquery 2462 2463 bracket = parse_bracket and self._parse_bracket(None) 2464 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2465 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2466 2467 if schema: 2468 return self._parse_schema(this=this) 2469 2470 if self.ALIAS_POST_TABLESAMPLE: 2471 table_sample = self._parse_table_sample() 2472 2473 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2474 if alias: 2475 this.set("alias", alias) 2476 2477 if not this.args.get("pivots"): 2478 this.set("pivots", self._parse_pivots()) 2479 2480 this.set("hints", self._parse_table_hints()) 2481 2482 if not self.ALIAS_POST_TABLESAMPLE: 2483 table_sample = self._parse_table_sample() 2484 2485 if table_sample: 2486 table_sample.set("this", this) 2487 this = table_sample 2488 2489 if joins: 2490 for join in iter(self._parse_join, None): 2491 this.append("joins", join) 2492 2493 return this 2494 2495 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2496 if not self._match(TokenType.UNNEST): 2497 return None 2498 2499 expressions = self._parse_wrapped_csv(self._parse_type) 2500 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2501 2502 alias = self._parse_table_alias() if with_alias else None 2503 2504 if alias and self.UNNEST_COLUMN_ONLY: 2505 if alias.args.get("columns"): 2506 self.raise_error("Unexpected extra column alias in unnest.") 2507 2508 alias.set("columns", [alias.this]) 2509 alias.set("this", None) 2510 2511 offset = None 2512 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2513 self._match(TokenType.ALIAS) 2514 offset = self._parse_id_var() or exp.to_identifier("offset") 2515 2516 return self.expression( 2517 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2518 ) 2519 2520 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2521 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2522 if not is_derived and not self._match(TokenType.VALUES): 2523 return None 2524 2525 expressions = self._parse_csv(self._parse_value) 2526 alias = self._parse_table_alias() 2527 2528 if is_derived: 2529 self._match_r_paren() 2530 2531 return self.expression( 2532 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2533 ) 2534 2535 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2536 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2537 as_modifier and self._match_text_seq("USING", "SAMPLE") 2538 ): 2539 return None 2540 2541 bucket_numerator = None 2542 bucket_denominator = None 2543 bucket_field = None 2544 percent = None 2545 rows = None 2546 size = None 2547 seed = None 2548 2549 kind = ( 2550 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2551 ) 2552 method = self._parse_var(tokens=(TokenType.ROW,)) 2553 2554 self._match(TokenType.L_PAREN) 2555 2556 num = self._parse_number() 2557 2558 if self._match_text_seq("BUCKET"): 2559 bucket_numerator = self._parse_number() 2560 self._match_text_seq("OUT", "OF") 2561 bucket_denominator = bucket_denominator = self._parse_number() 2562 self._match(TokenType.ON) 2563 bucket_field = self._parse_field() 2564 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2565 percent = num 2566 elif self._match(TokenType.ROWS): 2567 rows = num 2568 else: 2569 size = num 2570 2571 self._match(TokenType.R_PAREN) 2572 2573 if self._match(TokenType.L_PAREN): 2574 method = self._parse_var() 2575 seed = self._match(TokenType.COMMA) and self._parse_number() 2576 self._match_r_paren() 2577 elif self._match_texts(("SEED", "REPEATABLE")): 2578 seed = self._parse_wrapped(self._parse_number) 2579 2580 return self.expression( 2581 exp.TableSample, 2582 method=method, 2583 bucket_numerator=bucket_numerator, 2584 bucket_denominator=bucket_denominator, 2585 bucket_field=bucket_field, 2586 percent=percent, 2587 rows=rows, 2588 size=size, 2589 seed=seed, 2590 kind=kind, 2591 ) 2592 2593 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2594 return list(iter(self._parse_pivot, None)) or None 2595 2596 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2597 return list(iter(self._parse_join, None)) or None 2598 2599 # https://duckdb.org/docs/sql/statements/pivot 2600 def _parse_simplified_pivot(self) -> exp.Pivot: 2601 def _parse_on() -> t.Optional[exp.Expression]: 2602 this = self._parse_bitwise() 2603 return self._parse_in(this) if self._match(TokenType.IN) else this 2604 2605 this = self._parse_table() 2606 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2607 using = self._match(TokenType.USING) and self._parse_csv( 2608 lambda: self._parse_alias(self._parse_function()) 2609 ) 2610 group = self._parse_group() 2611 return self.expression( 2612 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2613 ) 2614 2615 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2616 index = self._index 2617 2618 if self._match(TokenType.PIVOT): 2619 unpivot = False 2620 elif self._match(TokenType.UNPIVOT): 2621 unpivot = True 2622 else: 2623 return None 2624 2625 expressions = [] 2626 field = None 2627 2628 if not self._match(TokenType.L_PAREN): 2629 self._retreat(index) 2630 return None 2631 2632 if unpivot: 2633 expressions = self._parse_csv(self._parse_column) 2634 else: 2635 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2636 2637 if not expressions: 2638 self.raise_error("Failed to parse PIVOT's aggregation list") 2639 2640 if not self._match(TokenType.FOR): 2641 self.raise_error("Expecting FOR") 2642 2643 value = self._parse_column() 2644 2645 if not self._match(TokenType.IN): 2646 self.raise_error("Expecting IN") 2647 2648 field = self._parse_in(value, alias=True) 2649 2650 self._match_r_paren() 2651 2652 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2653 2654 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2655 pivot.set("alias", self._parse_table_alias()) 2656 2657 if not unpivot: 2658 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2659 2660 columns: t.List[exp.Expression] = [] 2661 for fld in pivot.args["field"].expressions: 2662 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2663 for name in names: 2664 if self.PREFIXED_PIVOT_COLUMNS: 2665 name = f"{name}_{field_name}" if name else field_name 2666 else: 2667 name = f"{field_name}_{name}" if name else field_name 2668 2669 columns.append(exp.to_identifier(name)) 2670 2671 pivot.set("columns", columns) 2672 2673 return pivot 2674 2675 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2676 return [agg.alias for agg in aggregations] 2677 2678 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2679 if not skip_where_token and not self._match(TokenType.WHERE): 2680 return None 2681 2682 return self.expression( 2683 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2684 ) 2685 2686 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2687 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2688 return None 2689 2690 elements = defaultdict(list) 2691 2692 if self._match(TokenType.ALL): 2693 return self.expression(exp.Group, all=True) 2694 2695 while True: 2696 expressions = self._parse_csv(self._parse_conjunction) 2697 if expressions: 2698 elements["expressions"].extend(expressions) 2699 2700 grouping_sets = self._parse_grouping_sets() 2701 if grouping_sets: 2702 elements["grouping_sets"].extend(grouping_sets) 2703 2704 rollup = None 2705 cube = None 2706 totals = None 2707 2708 with_ = self._match(TokenType.WITH) 2709 if self._match(TokenType.ROLLUP): 2710 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2711 elements["rollup"].extend(ensure_list(rollup)) 2712 2713 if self._match(TokenType.CUBE): 2714 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2715 elements["cube"].extend(ensure_list(cube)) 2716 2717 if self._match_text_seq("TOTALS"): 2718 totals = True 2719 elements["totals"] = True # type: ignore 2720 2721 if not (grouping_sets or rollup or cube or totals): 2722 break 2723 2724 return self.expression(exp.Group, **elements) # type: ignore 2725 2726 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2727 if not self._match(TokenType.GROUPING_SETS): 2728 return None 2729 2730 return self._parse_wrapped_csv(self._parse_grouping_set) 2731 2732 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2733 if self._match(TokenType.L_PAREN): 2734 grouping_set = self._parse_csv(self._parse_column) 2735 self._match_r_paren() 2736 return self.expression(exp.Tuple, expressions=grouping_set) 2737 2738 return self._parse_column() 2739 2740 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2741 if not skip_having_token and not self._match(TokenType.HAVING): 2742 return None 2743 return self.expression(exp.Having, this=self._parse_conjunction()) 2744 2745 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2746 if not self._match(TokenType.QUALIFY): 2747 return None 2748 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2749 2750 def _parse_order( 2751 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2752 ) -> t.Optional[exp.Expression]: 2753 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2754 return this 2755 2756 return self.expression( 2757 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2758 ) 2759 2760 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2761 if not self._match(token): 2762 return None 2763 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2764 2765 def _parse_ordered(self) -> exp.Ordered: 2766 this = self._parse_conjunction() 2767 self._match(TokenType.ASC) 2768 2769 is_desc = self._match(TokenType.DESC) 2770 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2771 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2772 desc = is_desc or False 2773 asc = not desc 2774 nulls_first = is_nulls_first or False 2775 explicitly_null_ordered = is_nulls_first or is_nulls_last 2776 2777 if ( 2778 not explicitly_null_ordered 2779 and ( 2780 (asc and self.NULL_ORDERING == "nulls_are_small") 2781 or (desc and self.NULL_ORDERING != "nulls_are_small") 2782 ) 2783 and self.NULL_ORDERING != "nulls_are_last" 2784 ): 2785 nulls_first = True 2786 2787 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2788 2789 def _parse_limit( 2790 self, this: t.Optional[exp.Expression] = None, top: bool = False 2791 ) -> t.Optional[exp.Expression]: 2792 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2793 comments = self._prev_comments 2794 if top: 2795 limit_paren = self._match(TokenType.L_PAREN) 2796 expression = self._parse_number() 2797 2798 if limit_paren: 2799 self._match_r_paren() 2800 else: 2801 expression = self._parse_term() 2802 2803 if self._match(TokenType.COMMA): 2804 offset = expression 2805 expression = self._parse_term() 2806 else: 2807 offset = None 2808 2809 limit_exp = self.expression( 2810 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2811 ) 2812 2813 return limit_exp 2814 2815 if self._match(TokenType.FETCH): 2816 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2817 direction = self._prev.text if direction else "FIRST" 2818 2819 count = self._parse_number() 2820 percent = self._match(TokenType.PERCENT) 2821 2822 self._match_set((TokenType.ROW, TokenType.ROWS)) 2823 2824 only = self._match_text_seq("ONLY") 2825 with_ties = self._match_text_seq("WITH", "TIES") 2826 2827 if only and with_ties: 2828 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2829 2830 return self.expression( 2831 exp.Fetch, 2832 direction=direction, 2833 count=count, 2834 percent=percent, 2835 with_ties=with_ties, 2836 ) 2837 2838 return this 2839 2840 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2841 if not self._match(TokenType.OFFSET): 2842 return this 2843 2844 count = self._parse_term() 2845 self._match_set((TokenType.ROW, TokenType.ROWS)) 2846 return self.expression(exp.Offset, this=this, expression=count) 2847 2848 def _parse_locks(self) -> t.List[exp.Lock]: 2849 locks = [] 2850 while True: 2851 if self._match_text_seq("FOR", "UPDATE"): 2852 update = True 2853 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2854 "LOCK", "IN", "SHARE", "MODE" 2855 ): 2856 update = False 2857 else: 2858 break 2859 2860 expressions = None 2861 if self._match_text_seq("OF"): 2862 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2863 2864 wait: t.Optional[bool | exp.Expression] = None 2865 if self._match_text_seq("NOWAIT"): 2866 wait = True 2867 elif self._match_text_seq("WAIT"): 2868 wait = self._parse_primary() 2869 elif self._match_text_seq("SKIP", "LOCKED"): 2870 wait = False 2871 2872 locks.append( 2873 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2874 ) 2875 2876 return locks 2877 2878 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2879 if not self._match_set(self.SET_OPERATIONS): 2880 return this 2881 2882 token_type = self._prev.token_type 2883 2884 if token_type == TokenType.UNION: 2885 expression = exp.Union 2886 elif token_type == TokenType.EXCEPT: 2887 expression = exp.Except 2888 else: 2889 expression = exp.Intersect 2890 2891 return self.expression( 2892 expression, 2893 this=this, 2894 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2895 expression=self._parse_set_operations(self._parse_select(nested=True)), 2896 ) 2897 2898 def _parse_expression(self) -> t.Optional[exp.Expression]: 2899 return self._parse_alias(self._parse_conjunction()) 2900 2901 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2902 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2903 2904 def _parse_equality(self) -> t.Optional[exp.Expression]: 2905 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2906 2907 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2908 return self._parse_tokens(self._parse_range, self.COMPARISON) 2909 2910 def _parse_range(self) -> t.Optional[exp.Expression]: 2911 this = self._parse_bitwise() 2912 negate = self._match(TokenType.NOT) 2913 2914 if self._match_set(self.RANGE_PARSERS): 2915 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2916 if not expression: 2917 return this 2918 2919 this = expression 2920 elif self._match(TokenType.ISNULL): 2921 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2922 2923 # Postgres supports ISNULL and NOTNULL for conditions. 2924 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2925 if self._match(TokenType.NOTNULL): 2926 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2927 this = self.expression(exp.Not, this=this) 2928 2929 if negate: 2930 this = self.expression(exp.Not, this=this) 2931 2932 if self._match(TokenType.IS): 2933 this = self._parse_is(this) 2934 2935 return this 2936 2937 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2938 index = self._index - 1 2939 negate = self._match(TokenType.NOT) 2940 2941 if self._match_text_seq("DISTINCT", "FROM"): 2942 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2943 return self.expression(klass, this=this, expression=self._parse_expression()) 2944 2945 expression = self._parse_null() or self._parse_boolean() 2946 if not expression: 2947 self._retreat(index) 2948 return None 2949 2950 this = self.expression(exp.Is, this=this, expression=expression) 2951 return self.expression(exp.Not, this=this) if negate else this 2952 2953 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2954 unnest = self._parse_unnest(with_alias=False) 2955 if unnest: 2956 this = self.expression(exp.In, this=this, unnest=unnest) 2957 elif self._match(TokenType.L_PAREN): 2958 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2959 2960 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2961 this = self.expression(exp.In, this=this, query=expressions[0]) 2962 else: 2963 this = self.expression(exp.In, this=this, expressions=expressions) 2964 2965 self._match_r_paren(this) 2966 else: 2967 this = self.expression(exp.In, this=this, field=self._parse_field()) 2968 2969 return this 2970 2971 def _parse_between(self, this: exp.Expression) -> exp.Between: 2972 low = self._parse_bitwise() 2973 self._match(TokenType.AND) 2974 high = self._parse_bitwise() 2975 return self.expression(exp.Between, this=this, low=low, high=high) 2976 2977 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2978 if not self._match(TokenType.ESCAPE): 2979 return this 2980 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2981 2982 def _parse_interval(self) -> t.Optional[exp.Interval]: 2983 if not self._match(TokenType.INTERVAL): 2984 return None 2985 2986 if self._match(TokenType.STRING, advance=False): 2987 this = self._parse_primary() 2988 else: 2989 this = self._parse_term() 2990 2991 unit = self._parse_function() or self._parse_var() 2992 2993 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2994 # each INTERVAL expression into this canonical form so it's easy to transpile 2995 if this and this.is_number: 2996 this = exp.Literal.string(this.name) 2997 elif this and this.is_string: 2998 parts = this.name.split() 2999 3000 if len(parts) == 2: 3001 if unit: 3002 # this is not actually a unit, it's something else 3003 unit = None 3004 self._retreat(self._index - 1) 3005 else: 3006 this = exp.Literal.string(parts[0]) 3007 unit = self.expression(exp.Var, this=parts[1]) 3008 3009 return self.expression(exp.Interval, this=this, unit=unit) 3010 3011 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3012 this = self._parse_term() 3013 3014 while True: 3015 if self._match_set(self.BITWISE): 3016 this = self.expression( 3017 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3018 ) 3019 elif self._match_pair(TokenType.LT, TokenType.LT): 3020 this = self.expression( 3021 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3022 ) 3023 elif self._match_pair(TokenType.GT, TokenType.GT): 3024 this = self.expression( 3025 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3026 ) 3027 else: 3028 break 3029 3030 return this 3031 3032 def _parse_term(self) -> t.Optional[exp.Expression]: 3033 return self._parse_tokens(self._parse_factor, self.TERM) 3034 3035 def _parse_factor(self) -> t.Optional[exp.Expression]: 3036 return self._parse_tokens(self._parse_unary, self.FACTOR) 3037 3038 def _parse_unary(self) -> t.Optional[exp.Expression]: 3039 if self._match_set(self.UNARY_PARSERS): 3040 return self.UNARY_PARSERS[self._prev.token_type](self) 3041 return self._parse_at_time_zone(self._parse_type()) 3042 3043 def _parse_type(self) -> t.Optional[exp.Expression]: 3044 interval = self._parse_interval() 3045 if interval: 3046 return interval 3047 3048 index = self._index 3049 data_type = self._parse_types(check_func=True) 3050 this = self._parse_column() 3051 3052 if data_type: 3053 if isinstance(this, exp.Literal): 3054 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3055 if parser: 3056 return parser(self, this, data_type) 3057 return self.expression(exp.Cast, this=this, to=data_type) 3058 if not data_type.expressions: 3059 self._retreat(index) 3060 return self._parse_column() 3061 return self._parse_column_ops(data_type) 3062 3063 return this 3064 3065 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3066 this = self._parse_type() 3067 if not this: 3068 return None 3069 3070 return self.expression( 3071 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3072 ) 3073 3074 def _parse_types( 3075 self, check_func: bool = False, schema: bool = False 3076 ) -> t.Optional[exp.Expression]: 3077 index = self._index 3078 3079 prefix = self._match_text_seq("SYSUDTLIB", ".") 3080 3081 if not self._match_set(self.TYPE_TOKENS): 3082 return None 3083 3084 type_token = self._prev.token_type 3085 3086 if type_token == TokenType.PSEUDO_TYPE: 3087 return self.expression(exp.PseudoType, this=self._prev.text) 3088 3089 nested = type_token in self.NESTED_TYPE_TOKENS 3090 is_struct = type_token == TokenType.STRUCT 3091 expressions = None 3092 maybe_func = False 3093 3094 if self._match(TokenType.L_PAREN): 3095 if is_struct: 3096 expressions = self._parse_csv(self._parse_struct_types) 3097 elif nested: 3098 expressions = self._parse_csv( 3099 lambda: self._parse_types(check_func=check_func, schema=schema) 3100 ) 3101 elif type_token in self.ENUM_TYPE_TOKENS: 3102 expressions = self._parse_csv(self._parse_primary) 3103 else: 3104 expressions = self._parse_csv(self._parse_type_size) 3105 3106 if not expressions or not self._match(TokenType.R_PAREN): 3107 self._retreat(index) 3108 return None 3109 3110 maybe_func = True 3111 3112 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3113 this = exp.DataType( 3114 this=exp.DataType.Type.ARRAY, 3115 expressions=[ 3116 exp.DataType( 3117 this=exp.DataType.Type[type_token.value], 3118 expressions=expressions, 3119 nested=nested, 3120 ) 3121 ], 3122 nested=True, 3123 ) 3124 3125 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3126 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3127 3128 return this 3129 3130 if self._match(TokenType.L_BRACKET): 3131 self._retreat(index) 3132 return None 3133 3134 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3135 if nested and self._match(TokenType.LT): 3136 if is_struct: 3137 expressions = self._parse_csv(self._parse_struct_types) 3138 else: 3139 expressions = self._parse_csv( 3140 lambda: self._parse_types(check_func=check_func, schema=schema) 3141 ) 3142 3143 if not self._match(TokenType.GT): 3144 self.raise_error("Expecting >") 3145 3146 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3147 values = self._parse_csv(self._parse_conjunction) 3148 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3149 3150 value: t.Optional[exp.Expression] = None 3151 if type_token in self.TIMESTAMPS: 3152 if self._match_text_seq("WITH", "TIME", "ZONE"): 3153 maybe_func = False 3154 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3155 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3156 maybe_func = False 3157 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3158 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3159 maybe_func = False 3160 elif type_token == TokenType.INTERVAL: 3161 unit = self._parse_var() 3162 3163 if not unit: 3164 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3165 else: 3166 value = self.expression(exp.Interval, unit=unit) 3167 3168 if maybe_func and check_func: 3169 index2 = self._index 3170 peek = self._parse_string() 3171 3172 if not peek: 3173 self._retreat(index) 3174 return None 3175 3176 self._retreat(index2) 3177 3178 if value: 3179 return value 3180 3181 return exp.DataType( 3182 this=exp.DataType.Type[type_token.value], 3183 expressions=expressions, 3184 nested=nested, 3185 values=values, 3186 prefix=prefix, 3187 ) 3188 3189 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3190 this = self._parse_type() or self._parse_id_var() 3191 self._match(TokenType.COLON) 3192 return self._parse_column_def(this) 3193 3194 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3195 if not self._match_text_seq("AT", "TIME", "ZONE"): 3196 return this 3197 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3198 3199 def _parse_column(self) -> t.Optional[exp.Expression]: 3200 this = self._parse_field() 3201 if isinstance(this, exp.Identifier): 3202 this = self.expression(exp.Column, this=this) 3203 elif not this: 3204 return self._parse_bracket(this) 3205 return self._parse_column_ops(this) 3206 3207 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3208 this = self._parse_bracket(this) 3209 3210 while self._match_set(self.COLUMN_OPERATORS): 3211 op_token = self._prev.token_type 3212 op = self.COLUMN_OPERATORS.get(op_token) 3213 3214 if op_token == TokenType.DCOLON: 3215 field = self._parse_types() 3216 if not field: 3217 self.raise_error("Expected type") 3218 elif op and self._curr: 3219 self._advance() 3220 value = self._prev.text 3221 field = ( 3222 exp.Literal.number(value) 3223 if self._prev.token_type == TokenType.NUMBER 3224 else exp.Literal.string(value) 3225 ) 3226 else: 3227 field = self._parse_field(anonymous_func=True, any_token=True) 3228 3229 if isinstance(field, exp.Func): 3230 # bigquery allows function calls like x.y.count(...) 3231 # SAFE.SUBSTR(...) 3232 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3233 this = self._replace_columns_with_dots(this) 3234 3235 if op: 3236 this = op(self, this, field) 3237 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3238 this = self.expression( 3239 exp.Column, 3240 this=field, 3241 table=this.this, 3242 db=this.args.get("table"), 3243 catalog=this.args.get("db"), 3244 ) 3245 else: 3246 this = self.expression(exp.Dot, this=this, expression=field) 3247 this = self._parse_bracket(this) 3248 return this 3249 3250 def _parse_primary(self) -> t.Optional[exp.Expression]: 3251 if self._match_set(self.PRIMARY_PARSERS): 3252 token_type = self._prev.token_type 3253 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3254 3255 if token_type == TokenType.STRING: 3256 expressions = [primary] 3257 while self._match(TokenType.STRING): 3258 expressions.append(exp.Literal.string(self._prev.text)) 3259 3260 if len(expressions) > 1: 3261 return self.expression(exp.Concat, expressions=expressions) 3262 3263 return primary 3264 3265 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3266 return exp.Literal.number(f"0.{self._prev.text}") 3267 3268 if self._match(TokenType.L_PAREN): 3269 comments = self._prev_comments 3270 query = self._parse_select() 3271 3272 if query: 3273 expressions = [query] 3274 else: 3275 expressions = self._parse_expressions() 3276 3277 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3278 3279 if isinstance(this, exp.Subqueryable): 3280 this = self._parse_set_operations( 3281 self._parse_subquery(this=this, parse_alias=False) 3282 ) 3283 elif len(expressions) > 1: 3284 this = self.expression(exp.Tuple, expressions=expressions) 3285 else: 3286 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3287 3288 if this: 3289 this.add_comments(comments) 3290 3291 self._match_r_paren(expression=this) 3292 return this 3293 3294 return None 3295 3296 def _parse_field( 3297 self, 3298 any_token: bool = False, 3299 tokens: t.Optional[t.Collection[TokenType]] = None, 3300 anonymous_func: bool = False, 3301 ) -> t.Optional[exp.Expression]: 3302 return ( 3303 self._parse_primary() 3304 or self._parse_function(anonymous=anonymous_func) 3305 or self._parse_id_var(any_token=any_token, tokens=tokens) 3306 ) 3307 3308 def _parse_function( 3309 self, 3310 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3311 anonymous: bool = False, 3312 optional_parens: bool = True, 3313 ) -> t.Optional[exp.Expression]: 3314 if not self._curr: 3315 return None 3316 3317 token_type = self._curr.token_type 3318 3319 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3320 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3321 3322 if not self._next or self._next.token_type != TokenType.L_PAREN: 3323 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3324 self._advance() 3325 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3326 3327 return None 3328 3329 if token_type not in self.FUNC_TOKENS: 3330 return None 3331 3332 this = self._curr.text 3333 upper = this.upper() 3334 self._advance(2) 3335 3336 parser = self.FUNCTION_PARSERS.get(upper) 3337 3338 if parser and not anonymous: 3339 this = parser(self) 3340 else: 3341 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3342 3343 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3344 this = self.expression(subquery_predicate, this=self._parse_select()) 3345 self._match_r_paren() 3346 return this 3347 3348 if functions is None: 3349 functions = self.FUNCTIONS 3350 3351 function = functions.get(upper) 3352 3353 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3354 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3355 3356 if function and not anonymous: 3357 this = self.validate_expression(function(args), args) 3358 else: 3359 this = self.expression(exp.Anonymous, this=this, expressions=args) 3360 3361 self._match(TokenType.R_PAREN, expression=this) 3362 return self._parse_window(this) 3363 3364 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3365 return self._parse_column_def(self._parse_id_var()) 3366 3367 def _parse_user_defined_function( 3368 self, kind: t.Optional[TokenType] = None 3369 ) -> t.Optional[exp.Expression]: 3370 this = self._parse_id_var() 3371 3372 while self._match(TokenType.DOT): 3373 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3374 3375 if not self._match(TokenType.L_PAREN): 3376 return this 3377 3378 expressions = self._parse_csv(self._parse_function_parameter) 3379 self._match_r_paren() 3380 return self.expression( 3381 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3382 ) 3383 3384 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3385 literal = self._parse_primary() 3386 if literal: 3387 return self.expression(exp.Introducer, this=token.text, expression=literal) 3388 3389 return self.expression(exp.Identifier, this=token.text) 3390 3391 def _parse_session_parameter(self) -> exp.SessionParameter: 3392 kind = None 3393 this = self._parse_id_var() or self._parse_primary() 3394 3395 if this and self._match(TokenType.DOT): 3396 kind = this.name 3397 this = self._parse_var() or self._parse_primary() 3398 3399 return self.expression(exp.SessionParameter, this=this, kind=kind) 3400 3401 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3402 index = self._index 3403 3404 if self._match(TokenType.L_PAREN): 3405 expressions = self._parse_csv(self._parse_id_var) 3406 3407 if not self._match(TokenType.R_PAREN): 3408 self._retreat(index) 3409 else: 3410 expressions = [self._parse_id_var()] 3411 3412 if self._match_set(self.LAMBDAS): 3413 return self.LAMBDAS[self._prev.token_type](self, expressions) 3414 3415 self._retreat(index) 3416 3417 this: t.Optional[exp.Expression] 3418 3419 if self._match(TokenType.DISTINCT): 3420 this = self.expression( 3421 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3422 ) 3423 else: 3424 this = self._parse_select_or_expression(alias=alias) 3425 3426 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3427 3428 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3429 index = self._index 3430 3431 if not self.errors: 3432 try: 3433 if self._parse_select(nested=True): 3434 return this 3435 except ParseError: 3436 pass 3437 finally: 3438 self.errors.clear() 3439 self._retreat(index) 3440 3441 if not self._match(TokenType.L_PAREN): 3442 return this 3443 3444 args = self._parse_csv( 3445 lambda: self._parse_constraint() 3446 or self._parse_column_def(self._parse_field(any_token=True)) 3447 ) 3448 3449 self._match_r_paren() 3450 return self.expression(exp.Schema, this=this, expressions=args) 3451 3452 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3453 # column defs are not really columns, they're identifiers 3454 if isinstance(this, exp.Column): 3455 this = this.this 3456 3457 kind = self._parse_types(schema=True) 3458 3459 if self._match_text_seq("FOR", "ORDINALITY"): 3460 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3461 3462 constraints = [] 3463 while True: 3464 constraint = self._parse_column_constraint() 3465 if not constraint: 3466 break 3467 constraints.append(constraint) 3468 3469 if not kind and not constraints: 3470 return this 3471 3472 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3473 3474 def _parse_auto_increment( 3475 self, 3476 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3477 start = None 3478 increment = None 3479 3480 if self._match(TokenType.L_PAREN, advance=False): 3481 args = self._parse_wrapped_csv(self._parse_bitwise) 3482 start = seq_get(args, 0) 3483 increment = seq_get(args, 1) 3484 elif self._match_text_seq("START"): 3485 start = self._parse_bitwise() 3486 self._match_text_seq("INCREMENT") 3487 increment = self._parse_bitwise() 3488 3489 if start and increment: 3490 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3491 3492 return exp.AutoIncrementColumnConstraint() 3493 3494 def _parse_compress(self) -> exp.CompressColumnConstraint: 3495 if self._match(TokenType.L_PAREN, advance=False): 3496 return self.expression( 3497 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3498 ) 3499 3500 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3501 3502 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3503 if self._match_text_seq("BY", "DEFAULT"): 3504 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3505 this = self.expression( 3506 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3507 ) 3508 else: 3509 self._match_text_seq("ALWAYS") 3510 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3511 3512 self._match(TokenType.ALIAS) 3513 identity = self._match_text_seq("IDENTITY") 3514 3515 if self._match(TokenType.L_PAREN): 3516 if self._match_text_seq("START", "WITH"): 3517 this.set("start", self._parse_bitwise()) 3518 if self._match_text_seq("INCREMENT", "BY"): 3519 this.set("increment", self._parse_bitwise()) 3520 if self._match_text_seq("MINVALUE"): 3521 this.set("minvalue", self._parse_bitwise()) 3522 if self._match_text_seq("MAXVALUE"): 3523 this.set("maxvalue", self._parse_bitwise()) 3524 3525 if self._match_text_seq("CYCLE"): 3526 this.set("cycle", True) 3527 elif self._match_text_seq("NO", "CYCLE"): 3528 this.set("cycle", False) 3529 3530 if not identity: 3531 this.set("expression", self._parse_bitwise()) 3532 3533 self._match_r_paren() 3534 3535 return this 3536 3537 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3538 self._match_text_seq("LENGTH") 3539 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3540 3541 def _parse_not_constraint( 3542 self, 3543 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3544 if self._match_text_seq("NULL"): 3545 return self.expression(exp.NotNullColumnConstraint) 3546 if self._match_text_seq("CASESPECIFIC"): 3547 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3548 return None 3549 3550 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3551 if self._match(TokenType.CONSTRAINT): 3552 this = self._parse_id_var() 3553 else: 3554 this = None 3555 3556 if self._match_texts(self.CONSTRAINT_PARSERS): 3557 return self.expression( 3558 exp.ColumnConstraint, 3559 this=this, 3560 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3561 ) 3562 3563 return this 3564 3565 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3566 if not self._match(TokenType.CONSTRAINT): 3567 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3568 3569 this = self._parse_id_var() 3570 expressions = [] 3571 3572 while True: 3573 constraint = self._parse_unnamed_constraint() or self._parse_function() 3574 if not constraint: 3575 break 3576 expressions.append(constraint) 3577 3578 return self.expression(exp.Constraint, this=this, expressions=expressions) 3579 3580 def _parse_unnamed_constraint( 3581 self, constraints: t.Optional[t.Collection[str]] = None 3582 ) -> t.Optional[exp.Expression]: 3583 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3584 return None 3585 3586 constraint = self._prev.text.upper() 3587 if constraint not in self.CONSTRAINT_PARSERS: 3588 self.raise_error(f"No parser found for schema constraint {constraint}.") 3589 3590 return self.CONSTRAINT_PARSERS[constraint](self) 3591 3592 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3593 self._match_text_seq("KEY") 3594 return self.expression( 3595 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3596 ) 3597 3598 def _parse_key_constraint_options(self) -> t.List[str]: 3599 options = [] 3600 while True: 3601 if not self._curr: 3602 break 3603 3604 if self._match(TokenType.ON): 3605 action = None 3606 on = self._advance_any() and self._prev.text 3607 3608 if self._match_text_seq("NO", "ACTION"): 3609 action = "NO ACTION" 3610 elif self._match_text_seq("CASCADE"): 3611 action = "CASCADE" 3612 elif self._match_pair(TokenType.SET, TokenType.NULL): 3613 action = "SET NULL" 3614 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3615 action = "SET DEFAULT" 3616 else: 3617 self.raise_error("Invalid key constraint") 3618 3619 options.append(f"ON {on} {action}") 3620 elif self._match_text_seq("NOT", "ENFORCED"): 3621 options.append("NOT ENFORCED") 3622 elif self._match_text_seq("DEFERRABLE"): 3623 options.append("DEFERRABLE") 3624 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3625 options.append("INITIALLY DEFERRED") 3626 elif self._match_text_seq("NORELY"): 3627 options.append("NORELY") 3628 elif self._match_text_seq("MATCH", "FULL"): 3629 options.append("MATCH FULL") 3630 else: 3631 break 3632 3633 return options 3634 3635 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3636 if match and not self._match(TokenType.REFERENCES): 3637 return None 3638 3639 expressions = None 3640 this = self._parse_table(schema=True) 3641 options = self._parse_key_constraint_options() 3642 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3643 3644 def _parse_foreign_key(self) -> exp.ForeignKey: 3645 expressions = self._parse_wrapped_id_vars() 3646 reference = self._parse_references() 3647 options = {} 3648 3649 while self._match(TokenType.ON): 3650 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3651 self.raise_error("Expected DELETE or UPDATE") 3652 3653 kind = self._prev.text.lower() 3654 3655 if self._match_text_seq("NO", "ACTION"): 3656 action = "NO ACTION" 3657 elif self._match(TokenType.SET): 3658 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3659 action = "SET " + self._prev.text.upper() 3660 else: 3661 self._advance() 3662 action = self._prev.text.upper() 3663 3664 options[kind] = action 3665 3666 return self.expression( 3667 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3668 ) 3669 3670 def _parse_primary_key( 3671 self, wrapped_optional: bool = False, in_props: bool = False 3672 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3673 desc = ( 3674 self._match_set((TokenType.ASC, TokenType.DESC)) 3675 and self._prev.token_type == TokenType.DESC 3676 ) 3677 3678 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3679 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3680 3681 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3682 options = self._parse_key_constraint_options() 3683 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3684 3685 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3686 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3687 return this 3688 3689 bracket_kind = self._prev.token_type 3690 3691 if self._match(TokenType.COLON): 3692 expressions: t.List[t.Optional[exp.Expression]] = [ 3693 self.expression(exp.Slice, expression=self._parse_conjunction()) 3694 ] 3695 else: 3696 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3697 3698 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3699 if bracket_kind == TokenType.L_BRACE: 3700 this = self.expression(exp.Struct, expressions=expressions) 3701 elif not this or this.name.upper() == "ARRAY": 3702 this = self.expression(exp.Array, expressions=expressions) 3703 else: 3704 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3705 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3706 3707 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3708 self.raise_error("Expected ]") 3709 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3710 self.raise_error("Expected }") 3711 3712 self._add_comments(this) 3713 return self._parse_bracket(this) 3714 3715 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3716 if self._match(TokenType.COLON): 3717 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3718 return this 3719 3720 def _parse_case(self) -> t.Optional[exp.Expression]: 3721 ifs = [] 3722 default = None 3723 3724 expression = self._parse_conjunction() 3725 3726 while self._match(TokenType.WHEN): 3727 this = self._parse_conjunction() 3728 self._match(TokenType.THEN) 3729 then = self._parse_conjunction() 3730 ifs.append(self.expression(exp.If, this=this, true=then)) 3731 3732 if self._match(TokenType.ELSE): 3733 default = self._parse_conjunction() 3734 3735 if not self._match(TokenType.END): 3736 self.raise_error("Expected END after CASE", self._prev) 3737 3738 return self._parse_window( 3739 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3740 ) 3741 3742 def _parse_if(self) -> t.Optional[exp.Expression]: 3743 if self._match(TokenType.L_PAREN): 3744 args = self._parse_csv(self._parse_conjunction) 3745 this = self.validate_expression(exp.If.from_arg_list(args), args) 3746 self._match_r_paren() 3747 else: 3748 index = self._index - 1 3749 condition = self._parse_conjunction() 3750 3751 if not condition: 3752 self._retreat(index) 3753 return None 3754 3755 self._match(TokenType.THEN) 3756 true = self._parse_conjunction() 3757 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3758 self._match(TokenType.END) 3759 this = self.expression(exp.If, this=condition, true=true, false=false) 3760 3761 return self._parse_window(this) 3762 3763 def _parse_extract(self) -> exp.Extract: 3764 this = self._parse_function() or self._parse_var() or self._parse_type() 3765 3766 if self._match(TokenType.FROM): 3767 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3768 3769 if not self._match(TokenType.COMMA): 3770 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3771 3772 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3773 3774 def _parse_any_value(self) -> exp.AnyValue: 3775 this = self._parse_lambda() 3776 is_max = None 3777 having = None 3778 3779 if self._match(TokenType.HAVING): 3780 self._match_texts(("MAX", "MIN")) 3781 is_max = self._prev.text == "MAX" 3782 having = self._parse_column() 3783 3784 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3785 3786 def _parse_cast(self, strict: bool) -> exp.Expression: 3787 this = self._parse_conjunction() 3788 3789 if not self._match(TokenType.ALIAS): 3790 if self._match(TokenType.COMMA): 3791 return self.expression( 3792 exp.CastToStrType, this=this, expression=self._parse_string() 3793 ) 3794 else: 3795 self.raise_error("Expected AS after CAST") 3796 3797 fmt = None 3798 to = self._parse_types() 3799 3800 if not to: 3801 self.raise_error("Expected TYPE after CAST") 3802 elif to.this == exp.DataType.Type.CHAR: 3803 if self._match(TokenType.CHARACTER_SET): 3804 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3805 elif self._match(TokenType.FORMAT): 3806 fmt_string = self._parse_string() 3807 fmt = self._parse_at_time_zone(fmt_string) 3808 3809 if to.this in exp.DataType.TEMPORAL_TYPES: 3810 this = self.expression( 3811 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3812 this=this, 3813 format=exp.Literal.string( 3814 format_time( 3815 fmt_string.this if fmt_string else "", 3816 self.FORMAT_MAPPING or self.TIME_MAPPING, 3817 self.FORMAT_TRIE or self.TIME_TRIE, 3818 ) 3819 ), 3820 ) 3821 3822 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3823 this.set("zone", fmt.args["zone"]) 3824 3825 return this 3826 3827 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3828 3829 def _parse_concat(self) -> t.Optional[exp.Expression]: 3830 args = self._parse_csv(self._parse_conjunction) 3831 if self.CONCAT_NULL_OUTPUTS_STRING: 3832 args = [ 3833 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3834 for arg in args 3835 if arg 3836 ] 3837 3838 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3839 # we find such a call we replace it with its argument. 3840 if len(args) == 1: 3841 return args[0] 3842 3843 return self.expression( 3844 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3845 ) 3846 3847 def _parse_string_agg(self) -> exp.Expression: 3848 if self._match(TokenType.DISTINCT): 3849 args: t.List[t.Optional[exp.Expression]] = [ 3850 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3851 ] 3852 if self._match(TokenType.COMMA): 3853 args.extend(self._parse_csv(self._parse_conjunction)) 3854 else: 3855 args = self._parse_csv(self._parse_conjunction) 3856 3857 index = self._index 3858 if not self._match(TokenType.R_PAREN): 3859 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3860 return self.expression( 3861 exp.GroupConcat, 3862 this=seq_get(args, 0), 3863 separator=self._parse_order(this=seq_get(args, 1)), 3864 ) 3865 3866 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3867 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3868 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3869 if not self._match_text_seq("WITHIN", "GROUP"): 3870 self._retreat(index) 3871 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3872 3873 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3874 order = self._parse_order(this=seq_get(args, 0)) 3875 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3876 3877 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3878 this = self._parse_bitwise() 3879 3880 if self._match(TokenType.USING): 3881 to: t.Optional[exp.Expression] = self.expression( 3882 exp.CharacterSet, this=self._parse_var() 3883 ) 3884 elif self._match(TokenType.COMMA): 3885 to = self._parse_types() 3886 else: 3887 to = None 3888 3889 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3890 3891 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3892 """ 3893 There are generally two variants of the DECODE function: 3894 3895 - DECODE(bin, charset) 3896 - DECODE(expression, search, result [, search, result] ... [, default]) 3897 3898 The second variant will always be parsed into a CASE expression. Note that NULL 3899 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3900 instead of relying on pattern matching. 3901 """ 3902 args = self._parse_csv(self._parse_conjunction) 3903 3904 if len(args) < 3: 3905 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3906 3907 expression, *expressions = args 3908 if not expression: 3909 return None 3910 3911 ifs = [] 3912 for search, result in zip(expressions[::2], expressions[1::2]): 3913 if not search or not result: 3914 return None 3915 3916 if isinstance(search, exp.Literal): 3917 ifs.append( 3918 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3919 ) 3920 elif isinstance(search, exp.Null): 3921 ifs.append( 3922 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3923 ) 3924 else: 3925 cond = exp.or_( 3926 exp.EQ(this=expression.copy(), expression=search), 3927 exp.and_( 3928 exp.Is(this=expression.copy(), expression=exp.Null()), 3929 exp.Is(this=search.copy(), expression=exp.Null()), 3930 copy=False, 3931 ), 3932 copy=False, 3933 ) 3934 ifs.append(exp.If(this=cond, true=result)) 3935 3936 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3937 3938 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3939 self._match_text_seq("KEY") 3940 key = self._parse_field() 3941 self._match(TokenType.COLON) 3942 self._match_text_seq("VALUE") 3943 value = self._parse_field() 3944 3945 if not key and not value: 3946 return None 3947 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3948 3949 def _parse_json_object(self) -> exp.JSONObject: 3950 star = self._parse_star() 3951 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3952 3953 null_handling = None 3954 if self._match_text_seq("NULL", "ON", "NULL"): 3955 null_handling = "NULL ON NULL" 3956 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3957 null_handling = "ABSENT ON NULL" 3958 3959 unique_keys = None 3960 if self._match_text_seq("WITH", "UNIQUE"): 3961 unique_keys = True 3962 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3963 unique_keys = False 3964 3965 self._match_text_seq("KEYS") 3966 3967 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3968 format_json = self._match_text_seq("FORMAT", "JSON") 3969 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3970 3971 return self.expression( 3972 exp.JSONObject, 3973 expressions=expressions, 3974 null_handling=null_handling, 3975 unique_keys=unique_keys, 3976 return_type=return_type, 3977 format_json=format_json, 3978 encoding=encoding, 3979 ) 3980 3981 def _parse_logarithm(self) -> exp.Func: 3982 # Default argument order is base, expression 3983 args = self._parse_csv(self._parse_range) 3984 3985 if len(args) > 1: 3986 if not self.LOG_BASE_FIRST: 3987 args.reverse() 3988 return exp.Log.from_arg_list(args) 3989 3990 return self.expression( 3991 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3992 ) 3993 3994 def _parse_match_against(self) -> exp.MatchAgainst: 3995 expressions = self._parse_csv(self._parse_column) 3996 3997 self._match_text_seq(")", "AGAINST", "(") 3998 3999 this = self._parse_string() 4000 4001 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4002 modifier = "IN NATURAL LANGUAGE MODE" 4003 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4004 modifier = f"{modifier} WITH QUERY EXPANSION" 4005 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4006 modifier = "IN BOOLEAN MODE" 4007 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4008 modifier = "WITH QUERY EXPANSION" 4009 else: 4010 modifier = None 4011 4012 return self.expression( 4013 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4014 ) 4015 4016 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4017 def _parse_open_json(self) -> exp.OpenJSON: 4018 this = self._parse_bitwise() 4019 path = self._match(TokenType.COMMA) and self._parse_string() 4020 4021 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4022 this = self._parse_field(any_token=True) 4023 kind = self._parse_types() 4024 path = self._parse_string() 4025 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4026 4027 return self.expression( 4028 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4029 ) 4030 4031 expressions = None 4032 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4033 self._match_l_paren() 4034 expressions = self._parse_csv(_parse_open_json_column_def) 4035 4036 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4037 4038 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4039 args = self._parse_csv(self._parse_bitwise) 4040 4041 if self._match(TokenType.IN): 4042 return self.expression( 4043 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4044 ) 4045 4046 if haystack_first: 4047 haystack = seq_get(args, 0) 4048 needle = seq_get(args, 1) 4049 else: 4050 needle = seq_get(args, 0) 4051 haystack = seq_get(args, 1) 4052 4053 return self.expression( 4054 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4055 ) 4056 4057 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4058 args = self._parse_csv(self._parse_table) 4059 return exp.JoinHint(this=func_name.upper(), expressions=args) 4060 4061 def _parse_substring(self) -> exp.Substring: 4062 # Postgres supports the form: substring(string [from int] [for int]) 4063 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4064 4065 args = self._parse_csv(self._parse_bitwise) 4066 4067 if self._match(TokenType.FROM): 4068 args.append(self._parse_bitwise()) 4069 if self._match(TokenType.FOR): 4070 args.append(self._parse_bitwise()) 4071 4072 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4073 4074 def _parse_trim(self) -> exp.Trim: 4075 # https://www.w3resource.com/sql/character-functions/trim.php 4076 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4077 4078 position = None 4079 collation = None 4080 4081 if self._match_texts(self.TRIM_TYPES): 4082 position = self._prev.text.upper() 4083 4084 expression = self._parse_bitwise() 4085 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4086 this = self._parse_bitwise() 4087 else: 4088 this = expression 4089 expression = None 4090 4091 if self._match(TokenType.COLLATE): 4092 collation = self._parse_bitwise() 4093 4094 return self.expression( 4095 exp.Trim, this=this, position=position, expression=expression, collation=collation 4096 ) 4097 4098 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4099 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4100 4101 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4102 return self._parse_window(self._parse_id_var(), alias=True) 4103 4104 def _parse_respect_or_ignore_nulls( 4105 self, this: t.Optional[exp.Expression] 4106 ) -> t.Optional[exp.Expression]: 4107 if self._match_text_seq("IGNORE", "NULLS"): 4108 return self.expression(exp.IgnoreNulls, this=this) 4109 if self._match_text_seq("RESPECT", "NULLS"): 4110 return self.expression(exp.RespectNulls, this=this) 4111 return this 4112 4113 def _parse_window( 4114 self, this: t.Optional[exp.Expression], alias: bool = False 4115 ) -> t.Optional[exp.Expression]: 4116 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4117 self._match(TokenType.WHERE) 4118 this = self.expression( 4119 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4120 ) 4121 self._match_r_paren() 4122 4123 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4124 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4125 if self._match_text_seq("WITHIN", "GROUP"): 4126 order = self._parse_wrapped(self._parse_order) 4127 this = self.expression(exp.WithinGroup, this=this, expression=order) 4128 4129 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4130 # Some dialects choose to implement and some do not. 4131 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4132 4133 # There is some code above in _parse_lambda that handles 4134 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4135 4136 # The below changes handle 4137 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4138 4139 # Oracle allows both formats 4140 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4141 # and Snowflake chose to do the same for familiarity 4142 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4143 this = self._parse_respect_or_ignore_nulls(this) 4144 4145 # bigquery select from window x AS (partition by ...) 4146 if alias: 4147 over = None 4148 self._match(TokenType.ALIAS) 4149 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4150 return this 4151 else: 4152 over = self._prev.text.upper() 4153 4154 if not self._match(TokenType.L_PAREN): 4155 return self.expression( 4156 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4157 ) 4158 4159 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4160 4161 first = self._match(TokenType.FIRST) 4162 if self._match_text_seq("LAST"): 4163 first = False 4164 4165 partition = self._parse_partition_by() 4166 order = self._parse_order() 4167 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4168 4169 if kind: 4170 self._match(TokenType.BETWEEN) 4171 start = self._parse_window_spec() 4172 self._match(TokenType.AND) 4173 end = self._parse_window_spec() 4174 4175 spec = self.expression( 4176 exp.WindowSpec, 4177 kind=kind, 4178 start=start["value"], 4179 start_side=start["side"], 4180 end=end["value"], 4181 end_side=end["side"], 4182 ) 4183 else: 4184 spec = None 4185 4186 self._match_r_paren() 4187 4188 window = self.expression( 4189 exp.Window, 4190 this=this, 4191 partition_by=partition, 4192 order=order, 4193 spec=spec, 4194 alias=window_alias, 4195 over=over, 4196 first=first, 4197 ) 4198 4199 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4200 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4201 return self._parse_window(window, alias=alias) 4202 4203 return window 4204 4205 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4206 self._match(TokenType.BETWEEN) 4207 4208 return { 4209 "value": ( 4210 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4211 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4212 or self._parse_bitwise() 4213 ), 4214 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4215 } 4216 4217 def _parse_alias( 4218 self, this: t.Optional[exp.Expression], explicit: bool = False 4219 ) -> t.Optional[exp.Expression]: 4220 any_token = self._match(TokenType.ALIAS) 4221 4222 if explicit and not any_token: 4223 return this 4224 4225 if self._match(TokenType.L_PAREN): 4226 aliases = self.expression( 4227 exp.Aliases, 4228 this=this, 4229 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4230 ) 4231 self._match_r_paren(aliases) 4232 return aliases 4233 4234 alias = self._parse_id_var(any_token) 4235 4236 if alias: 4237 return self.expression(exp.Alias, this=this, alias=alias) 4238 4239 return this 4240 4241 def _parse_id_var( 4242 self, 4243 any_token: bool = True, 4244 tokens: t.Optional[t.Collection[TokenType]] = None, 4245 ) -> t.Optional[exp.Expression]: 4246 identifier = self._parse_identifier() 4247 4248 if identifier: 4249 return identifier 4250 4251 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4252 quoted = self._prev.token_type == TokenType.STRING 4253 return exp.Identifier(this=self._prev.text, quoted=quoted) 4254 4255 return None 4256 4257 def _parse_string(self) -> t.Optional[exp.Expression]: 4258 if self._match(TokenType.STRING): 4259 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4260 return self._parse_placeholder() 4261 4262 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4263 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4264 4265 def _parse_number(self) -> t.Optional[exp.Expression]: 4266 if self._match(TokenType.NUMBER): 4267 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4268 return self._parse_placeholder() 4269 4270 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4271 if self._match(TokenType.IDENTIFIER): 4272 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4273 return self._parse_placeholder() 4274 4275 def _parse_var( 4276 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4277 ) -> t.Optional[exp.Expression]: 4278 if ( 4279 (any_token and self._advance_any()) 4280 or self._match(TokenType.VAR) 4281 or (self._match_set(tokens) if tokens else False) 4282 ): 4283 return self.expression(exp.Var, this=self._prev.text) 4284 return self._parse_placeholder() 4285 4286 def _advance_any(self) -> t.Optional[Token]: 4287 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4288 self._advance() 4289 return self._prev 4290 return None 4291 4292 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4293 return self._parse_var() or self._parse_string() 4294 4295 def _parse_null(self) -> t.Optional[exp.Expression]: 4296 if self._match(TokenType.NULL): 4297 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4298 return None 4299 4300 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4301 if self._match(TokenType.TRUE): 4302 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4303 if self._match(TokenType.FALSE): 4304 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4305 return None 4306 4307 def _parse_star(self) -> t.Optional[exp.Expression]: 4308 if self._match(TokenType.STAR): 4309 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4310 return None 4311 4312 def _parse_parameter(self) -> exp.Parameter: 4313 wrapped = self._match(TokenType.L_BRACE) 4314 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4315 self._match(TokenType.R_BRACE) 4316 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4317 4318 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4319 if self._match_set(self.PLACEHOLDER_PARSERS): 4320 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4321 if placeholder: 4322 return placeholder 4323 self._advance(-1) 4324 return None 4325 4326 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4327 if not self._match(TokenType.EXCEPT): 4328 return None 4329 if self._match(TokenType.L_PAREN, advance=False): 4330 return self._parse_wrapped_csv(self._parse_column) 4331 return self._parse_csv(self._parse_column) 4332 4333 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4334 if not self._match(TokenType.REPLACE): 4335 return None 4336 if self._match(TokenType.L_PAREN, advance=False): 4337 return self._parse_wrapped_csv(self._parse_expression) 4338 return self._parse_expressions() 4339 4340 def _parse_csv( 4341 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4342 ) -> t.List[t.Optional[exp.Expression]]: 4343 parse_result = parse_method() 4344 items = [parse_result] if parse_result is not None else [] 4345 4346 while self._match(sep): 4347 self._add_comments(parse_result) 4348 parse_result = parse_method() 4349 if parse_result is not None: 4350 items.append(parse_result) 4351 4352 return items 4353 4354 def _parse_tokens( 4355 self, parse_method: t.Callable, expressions: t.Dict 4356 ) -> t.Optional[exp.Expression]: 4357 this = parse_method() 4358 4359 while self._match_set(expressions): 4360 this = self.expression( 4361 expressions[self._prev.token_type], 4362 this=this, 4363 comments=self._prev_comments, 4364 expression=parse_method(), 4365 ) 4366 4367 return this 4368 4369 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4370 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4371 4372 def _parse_wrapped_csv( 4373 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4374 ) -> t.List[t.Optional[exp.Expression]]: 4375 return self._parse_wrapped( 4376 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4377 ) 4378 4379 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4380 wrapped = self._match(TokenType.L_PAREN) 4381 if not wrapped and not optional: 4382 self.raise_error("Expecting (") 4383 parse_result = parse_method() 4384 if wrapped: 4385 self._match_r_paren() 4386 return parse_result 4387 4388 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4389 return self._parse_csv(self._parse_expression) 4390 4391 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4392 return self._parse_select() or self._parse_set_operations( 4393 self._parse_expression() if alias else self._parse_conjunction() 4394 ) 4395 4396 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4397 return self._parse_query_modifiers( 4398 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4399 ) 4400 4401 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4402 this = None 4403 if self._match_texts(self.TRANSACTION_KIND): 4404 this = self._prev.text 4405 4406 self._match_texts({"TRANSACTION", "WORK"}) 4407 4408 modes = [] 4409 while True: 4410 mode = [] 4411 while self._match(TokenType.VAR): 4412 mode.append(self._prev.text) 4413 4414 if mode: 4415 modes.append(" ".join(mode)) 4416 if not self._match(TokenType.COMMA): 4417 break 4418 4419 return self.expression(exp.Transaction, this=this, modes=modes) 4420 4421 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4422 chain = None 4423 savepoint = None 4424 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4425 4426 self._match_texts({"TRANSACTION", "WORK"}) 4427 4428 if self._match_text_seq("TO"): 4429 self._match_text_seq("SAVEPOINT") 4430 savepoint = self._parse_id_var() 4431 4432 if self._match(TokenType.AND): 4433 chain = not self._match_text_seq("NO") 4434 self._match_text_seq("CHAIN") 4435 4436 if is_rollback: 4437 return self.expression(exp.Rollback, savepoint=savepoint) 4438 4439 return self.expression(exp.Commit, chain=chain) 4440 4441 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4442 if not self._match_text_seq("ADD"): 4443 return None 4444 4445 self._match(TokenType.COLUMN) 4446 exists_column = self._parse_exists(not_=True) 4447 expression = self._parse_column_def(self._parse_field(any_token=True)) 4448 4449 if expression: 4450 expression.set("exists", exists_column) 4451 4452 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4453 if self._match_texts(("FIRST", "AFTER")): 4454 position = self._prev.text 4455 column_position = self.expression( 4456 exp.ColumnPosition, this=self._parse_column(), position=position 4457 ) 4458 expression.set("position", column_position) 4459 4460 return expression 4461 4462 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4463 drop = self._match(TokenType.DROP) and self._parse_drop() 4464 if drop and not isinstance(drop, exp.Command): 4465 drop.set("kind", drop.args.get("kind", "COLUMN")) 4466 return drop 4467 4468 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4469 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4470 return self.expression( 4471 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4472 ) 4473 4474 def _parse_add_constraint(self) -> exp.AddConstraint: 4475 this = None 4476 kind = self._prev.token_type 4477 4478 if kind == TokenType.CONSTRAINT: 4479 this = self._parse_id_var() 4480 4481 if self._match_text_seq("CHECK"): 4482 expression = self._parse_wrapped(self._parse_conjunction) 4483 enforced = self._match_text_seq("ENFORCED") 4484 4485 return self.expression( 4486 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4487 ) 4488 4489 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4490 expression = self._parse_foreign_key() 4491 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4492 expression = self._parse_primary_key() 4493 else: 4494 expression = None 4495 4496 return self.expression(exp.AddConstraint, this=this, expression=expression) 4497 4498 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4499 index = self._index - 1 4500 4501 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4502 return self._parse_csv(self._parse_add_constraint) 4503 4504 self._retreat(index) 4505 return self._parse_csv(self._parse_add_column) 4506 4507 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4508 self._match(TokenType.COLUMN) 4509 column = self._parse_field(any_token=True) 4510 4511 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4512 return self.expression(exp.AlterColumn, this=column, drop=True) 4513 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4514 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4515 4516 self._match_text_seq("SET", "DATA") 4517 return self.expression( 4518 exp.AlterColumn, 4519 this=column, 4520 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4521 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4522 using=self._match(TokenType.USING) and self._parse_conjunction(), 4523 ) 4524 4525 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4526 index = self._index - 1 4527 4528 partition_exists = self._parse_exists() 4529 if self._match(TokenType.PARTITION, advance=False): 4530 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4531 4532 self._retreat(index) 4533 return self._parse_csv(self._parse_drop_column) 4534 4535 def _parse_alter_table_rename(self) -> exp.RenameTable: 4536 self._match_text_seq("TO") 4537 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4538 4539 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4540 start = self._prev 4541 4542 if not self._match(TokenType.TABLE): 4543 return self._parse_as_command(start) 4544 4545 exists = self._parse_exists() 4546 this = self._parse_table(schema=True) 4547 4548 if self._next: 4549 self._advance() 4550 4551 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4552 if parser: 4553 actions = ensure_list(parser(self)) 4554 4555 if not self._curr: 4556 return self.expression( 4557 exp.AlterTable, 4558 this=this, 4559 exists=exists, 4560 actions=actions, 4561 ) 4562 return self._parse_as_command(start) 4563 4564 def _parse_merge(self) -> exp.Merge: 4565 self._match(TokenType.INTO) 4566 target = self._parse_table() 4567 4568 self._match(TokenType.USING) 4569 using = self._parse_table() 4570 4571 self._match(TokenType.ON) 4572 on = self._parse_conjunction() 4573 4574 whens = [] 4575 while self._match(TokenType.WHEN): 4576 matched = not self._match(TokenType.NOT) 4577 self._match_text_seq("MATCHED") 4578 source = ( 4579 False 4580 if self._match_text_seq("BY", "TARGET") 4581 else self._match_text_seq("BY", "SOURCE") 4582 ) 4583 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4584 4585 self._match(TokenType.THEN) 4586 4587 if self._match(TokenType.INSERT): 4588 _this = self._parse_star() 4589 if _this: 4590 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4591 else: 4592 then = self.expression( 4593 exp.Insert, 4594 this=self._parse_value(), 4595 expression=self._match(TokenType.VALUES) and self._parse_value(), 4596 ) 4597 elif self._match(TokenType.UPDATE): 4598 expressions = self._parse_star() 4599 if expressions: 4600 then = self.expression(exp.Update, expressions=expressions) 4601 else: 4602 then = self.expression( 4603 exp.Update, 4604 expressions=self._match(TokenType.SET) 4605 and self._parse_csv(self._parse_equality), 4606 ) 4607 elif self._match(TokenType.DELETE): 4608 then = self.expression(exp.Var, this=self._prev.text) 4609 else: 4610 then = None 4611 4612 whens.append( 4613 self.expression( 4614 exp.When, 4615 matched=matched, 4616 source=source, 4617 condition=condition, 4618 then=then, 4619 ) 4620 ) 4621 4622 return self.expression( 4623 exp.Merge, 4624 this=target, 4625 using=using, 4626 on=on, 4627 expressions=whens, 4628 ) 4629 4630 def _parse_show(self) -> t.Optional[exp.Expression]: 4631 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4632 if parser: 4633 return parser(self) 4634 self._advance() 4635 return self.expression(exp.Show, this=self._prev.text.upper()) 4636 4637 def _parse_set_item_assignment( 4638 self, kind: t.Optional[str] = None 4639 ) -> t.Optional[exp.Expression]: 4640 index = self._index 4641 4642 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4643 return self._parse_set_transaction(global_=kind == "GLOBAL") 4644 4645 left = self._parse_primary() or self._parse_id_var() 4646 4647 if not self._match_texts(("=", "TO")): 4648 self._retreat(index) 4649 return None 4650 4651 right = self._parse_statement() or self._parse_id_var() 4652 this = self.expression(exp.EQ, this=left, expression=right) 4653 4654 return self.expression(exp.SetItem, this=this, kind=kind) 4655 4656 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4657 self._match_text_seq("TRANSACTION") 4658 characteristics = self._parse_csv( 4659 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4660 ) 4661 return self.expression( 4662 exp.SetItem, 4663 expressions=characteristics, 4664 kind="TRANSACTION", 4665 **{"global": global_}, # type: ignore 4666 ) 4667 4668 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4669 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4670 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4671 4672 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4673 index = self._index 4674 set_ = self.expression( 4675 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4676 ) 4677 4678 if self._curr: 4679 self._retreat(index) 4680 return self._parse_as_command(self._prev) 4681 4682 return set_ 4683 4684 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4685 for option in options: 4686 if self._match_text_seq(*option.split(" ")): 4687 return exp.var(option) 4688 return None 4689 4690 def _parse_as_command(self, start: Token) -> exp.Command: 4691 while self._curr: 4692 self._advance() 4693 text = self._find_sql(start, self._prev) 4694 size = len(start.text) 4695 return exp.Command(this=text[:size], expression=text[size:]) 4696 4697 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4698 settings = [] 4699 4700 self._match_l_paren() 4701 kind = self._parse_id_var() 4702 4703 if self._match(TokenType.L_PAREN): 4704 while True: 4705 key = self._parse_id_var() 4706 value = self._parse_primary() 4707 4708 if not key and value is None: 4709 break 4710 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4711 self._match(TokenType.R_PAREN) 4712 4713 self._match_r_paren() 4714 4715 return self.expression( 4716 exp.DictProperty, 4717 this=this, 4718 kind=kind.this if kind else None, 4719 settings=settings, 4720 ) 4721 4722 def _parse_dict_range(self, this: str) -> exp.DictRange: 4723 self._match_l_paren() 4724 has_min = self._match_text_seq("MIN") 4725 if has_min: 4726 min = self._parse_var() or self._parse_primary() 4727 self._match_text_seq("MAX") 4728 max = self._parse_var() or self._parse_primary() 4729 else: 4730 max = self._parse_var() or self._parse_primary() 4731 min = exp.Literal.number(0) 4732 self._match_r_paren() 4733 return self.expression(exp.DictRange, this=this, min=min, max=max) 4734 4735 def _find_parser( 4736 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4737 ) -> t.Optional[t.Callable]: 4738 if not self._curr: 4739 return None 4740 4741 index = self._index 4742 this = [] 4743 while True: 4744 # The current token might be multiple words 4745 curr = self._curr.text.upper() 4746 key = curr.split(" ") 4747 this.append(curr) 4748 4749 self._advance() 4750 result, trie = in_trie(trie, key) 4751 if result == TrieResult.FAILED: 4752 break 4753 4754 if result == TrieResult.EXISTS: 4755 subparser = parsers[" ".join(this)] 4756 return subparser 4757 4758 self._retreat(index) 4759 return None 4760 4761 def _match(self, token_type, advance=True, expression=None): 4762 if not self._curr: 4763 return None 4764 4765 if self._curr.token_type == token_type: 4766 if advance: 4767 self._advance() 4768 self._add_comments(expression) 4769 return True 4770 4771 return None 4772 4773 def _match_set(self, types, advance=True): 4774 if not self._curr: 4775 return None 4776 4777 if self._curr.token_type in types: 4778 if advance: 4779 self._advance() 4780 return True 4781 4782 return None 4783 4784 def _match_pair(self, token_type_a, token_type_b, advance=True): 4785 if not self._curr or not self._next: 4786 return None 4787 4788 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4789 if advance: 4790 self._advance(2) 4791 return True 4792 4793 return None 4794 4795 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4796 if not self._match(TokenType.L_PAREN, expression=expression): 4797 self.raise_error("Expecting (") 4798 4799 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4800 if not self._match(TokenType.R_PAREN, expression=expression): 4801 self.raise_error("Expecting )") 4802 4803 def _match_texts(self, texts, advance=True): 4804 if self._curr and self._curr.text.upper() in texts: 4805 if advance: 4806 self._advance() 4807 return True 4808 return False 4809 4810 def _match_text_seq(self, *texts, advance=True): 4811 index = self._index 4812 for text in texts: 4813 if self._curr and self._curr.text.upper() == text: 4814 self._advance() 4815 else: 4816 self._retreat(index) 4817 return False 4818 4819 if not advance: 4820 self._retreat(index) 4821 4822 return True 4823 4824 @t.overload 4825 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4826 ... 4827 4828 @t.overload 4829 def _replace_columns_with_dots( 4830 self, this: t.Optional[exp.Expression] 4831 ) -> t.Optional[exp.Expression]: 4832 ... 4833 4834 def _replace_columns_with_dots(self, this): 4835 if isinstance(this, exp.Dot): 4836 exp.replace_children(this, self._replace_columns_with_dots) 4837 elif isinstance(this, exp.Column): 4838 exp.replace_children(this, self._replace_columns_with_dots) 4839 table = this.args.get("table") 4840 this = ( 4841 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4842 ) 4843 4844 return this 4845 4846 def _replace_lambda( 4847 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4848 ) -> t.Optional[exp.Expression]: 4849 if not node: 4850 return node 4851 4852 for column in node.find_all(exp.Column): 4853 if column.parts[0].name in lambda_variables: 4854 dot_or_id = column.to_dot() if column.table else column.this 4855 parent = column.parent 4856 4857 while isinstance(parent, exp.Dot): 4858 if not isinstance(parent.parent, exp.Dot): 4859 parent.replace(dot_or_id) 4860 break 4861 parent = parent.parent 4862 else: 4863 if column is node: 4864 node = dot_or_id 4865 else: 4866 column.replace(dot_or_id) 4867 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset()
865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 )
Logs or raises any found errors, depending on the chosen error level setting.
965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.