sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.RLIKE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 TokenType.XOR, 343 *TYPE_TOKENS, 344 *SUBQUERY_PREDICATES, 345 } 346 347 CONJUNCTION = { 348 TokenType.AND: exp.And, 349 TokenType.OR: exp.Or, 350 } 351 352 EQUALITY = { 353 TokenType.EQ: exp.EQ, 354 TokenType.NEQ: exp.NEQ, 355 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 356 } 357 358 COMPARISON = { 359 TokenType.GT: exp.GT, 360 TokenType.GTE: exp.GTE, 361 TokenType.LT: exp.LT, 362 TokenType.LTE: exp.LTE, 363 } 364 365 BITWISE = { 366 TokenType.AMP: exp.BitwiseAnd, 367 TokenType.CARET: exp.BitwiseXor, 368 TokenType.PIPE: exp.BitwiseOr, 369 TokenType.DPIPE: exp.DPipe, 370 } 371 372 TERM = { 373 TokenType.DASH: exp.Sub, 374 TokenType.PLUS: exp.Add, 375 TokenType.MOD: exp.Mod, 376 TokenType.COLLATE: exp.Collate, 377 } 378 379 FACTOR = { 380 TokenType.DIV: exp.IntDiv, 381 TokenType.LR_ARROW: exp.Distance, 382 TokenType.SLASH: exp.Div, 383 TokenType.STAR: exp.Mul, 384 } 385 386 TIMESTAMPS = { 387 TokenType.TIME, 388 TokenType.TIMESTAMP, 389 TokenType.TIMESTAMPTZ, 390 TokenType.TIMESTAMPLTZ, 391 } 392 393 SET_OPERATIONS = { 394 TokenType.UNION, 395 TokenType.INTERSECT, 396 TokenType.EXCEPT, 397 } 398 399 JOIN_METHODS = { 400 TokenType.NATURAL, 401 TokenType.ASOF, 402 } 403 404 JOIN_SIDES = { 405 TokenType.LEFT, 406 TokenType.RIGHT, 407 TokenType.FULL, 408 } 409 410 JOIN_KINDS = { 411 TokenType.INNER, 412 TokenType.OUTER, 413 TokenType.CROSS, 414 TokenType.SEMI, 415 TokenType.ANTI, 416 } 417 418 JOIN_HINTS: t.Set[str] = set() 419 420 LAMBDAS = { 421 TokenType.ARROW: lambda self, expressions: self.expression( 422 exp.Lambda, 423 this=self._replace_lambda( 424 self._parse_conjunction(), 425 {node.name for node in expressions}, 426 ), 427 expressions=expressions, 428 ), 429 TokenType.FARROW: lambda self, expressions: self.expression( 430 exp.Kwarg, 431 this=exp.var(expressions[0].name), 432 expression=self._parse_conjunction(), 433 ), 434 } 435 436 COLUMN_OPERATORS = { 437 TokenType.DOT: None, 438 TokenType.DCOLON: lambda self, this, to: self.expression( 439 exp.Cast if self.STRICT_CAST else exp.TryCast, 440 this=this, 441 to=to, 442 ), 443 TokenType.ARROW: lambda self, this, path: self.expression( 444 exp.JSONExtract, 445 this=this, 446 expression=path, 447 ), 448 TokenType.DARROW: lambda self, this, path: self.expression( 449 exp.JSONExtractScalar, 450 this=this, 451 expression=path, 452 ), 453 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 454 exp.JSONBExtract, 455 this=this, 456 expression=path, 457 ), 458 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 459 exp.JSONBExtractScalar, 460 this=this, 461 expression=path, 462 ), 463 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 464 exp.JSONBContains, 465 this=this, 466 expression=key, 467 ), 468 } 469 470 EXPRESSION_PARSERS = { 471 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 472 exp.Column: lambda self: self._parse_column(), 473 exp.Condition: lambda self: self._parse_conjunction(), 474 exp.DataType: lambda self: self._parse_types(), 475 exp.Expression: lambda self: self._parse_statement(), 476 exp.From: lambda self: self._parse_from(), 477 exp.Group: lambda self: self._parse_group(), 478 exp.Having: lambda self: self._parse_having(), 479 exp.Identifier: lambda self: self._parse_id_var(), 480 exp.Join: lambda self: self._parse_join(), 481 exp.Lambda: lambda self: self._parse_lambda(), 482 exp.Lateral: lambda self: self._parse_lateral(), 483 exp.Limit: lambda self: self._parse_limit(), 484 exp.Offset: lambda self: self._parse_offset(), 485 exp.Order: lambda self: self._parse_order(), 486 exp.Ordered: lambda self: self._parse_ordered(), 487 exp.Properties: lambda self: self._parse_properties(), 488 exp.Qualify: lambda self: self._parse_qualify(), 489 exp.Returning: lambda self: self._parse_returning(), 490 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 491 exp.Table: lambda self: self._parse_table_parts(), 492 exp.TableAlias: lambda self: self._parse_table_alias(), 493 exp.Where: lambda self: self._parse_where(), 494 exp.Window: lambda self: self._parse_named_window(), 495 exp.With: lambda self: self._parse_with(), 496 "JOIN_TYPE": lambda self: self._parse_join_parts(), 497 } 498 499 STATEMENT_PARSERS = { 500 TokenType.ALTER: lambda self: self._parse_alter(), 501 TokenType.BEGIN: lambda self: self._parse_transaction(), 502 TokenType.CACHE: lambda self: self._parse_cache(), 503 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 504 TokenType.COMMENT: lambda self: self._parse_comment(), 505 TokenType.CREATE: lambda self: self._parse_create(), 506 TokenType.DELETE: lambda self: self._parse_delete(), 507 TokenType.DESC: lambda self: self._parse_describe(), 508 TokenType.DESCRIBE: lambda self: self._parse_describe(), 509 TokenType.DROP: lambda self: self._parse_drop(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 comments=start.comments, 1149 exists=self._parse_exists(), 1150 this=self._parse_table(schema=True), 1151 kind=kind, 1152 temporary=temporary, 1153 materialized=materialized, 1154 cascade=self._match_text_seq("CASCADE"), 1155 constraints=self._match_text_seq("CONSTRAINTS"), 1156 purge=self._match_text_seq("PURGE"), 1157 ) 1158 1159 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1160 return ( 1161 self._match(TokenType.IF) 1162 and (not not_ or self._match(TokenType.NOT)) 1163 and self._match(TokenType.EXISTS) 1164 ) 1165 1166 def _parse_create(self) -> exp.Create | exp.Command: 1167 # Note: this can't be None because we've matched a statement parser 1168 start = self._prev 1169 replace = start.text.upper() == "REPLACE" or self._match_pair( 1170 TokenType.OR, TokenType.REPLACE 1171 ) 1172 unique = self._match(TokenType.UNIQUE) 1173 1174 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1175 self._advance() 1176 1177 properties = None 1178 create_token = self._match_set(self.CREATABLES) and self._prev 1179 1180 if not create_token: 1181 # exp.Properties.Location.POST_CREATE 1182 properties = self._parse_properties() 1183 create_token = self._match_set(self.CREATABLES) and self._prev 1184 1185 if not properties or not create_token: 1186 return self._parse_as_command(start) 1187 1188 exists = self._parse_exists(not_=True) 1189 this = None 1190 expression = None 1191 indexes = None 1192 no_schema_binding = None 1193 begin = None 1194 clone = None 1195 1196 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1197 nonlocal properties 1198 if properties and temp_props: 1199 properties.expressions.extend(temp_props.expressions) 1200 elif temp_props: 1201 properties = temp_props 1202 1203 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1204 this = self._parse_user_defined_function(kind=create_token.token_type) 1205 1206 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1207 extend_props(self._parse_properties()) 1208 1209 self._match(TokenType.ALIAS) 1210 begin = self._match(TokenType.BEGIN) 1211 return_ = self._match_text_seq("RETURN") 1212 expression = self._parse_statement() 1213 1214 if return_: 1215 expression = self.expression(exp.Return, this=expression) 1216 elif create_token.token_type == TokenType.INDEX: 1217 this = self._parse_index(index=self._parse_id_var()) 1218 elif create_token.token_type in self.DB_CREATABLES: 1219 table_parts = self._parse_table_parts(schema=True) 1220 1221 # exp.Properties.Location.POST_NAME 1222 self._match(TokenType.COMMA) 1223 extend_props(self._parse_properties(before=True)) 1224 1225 this = self._parse_schema(this=table_parts) 1226 1227 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1228 extend_props(self._parse_properties()) 1229 1230 self._match(TokenType.ALIAS) 1231 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1232 # exp.Properties.Location.POST_ALIAS 1233 extend_props(self._parse_properties()) 1234 1235 expression = self._parse_ddl_select() 1236 1237 if create_token.token_type == TokenType.TABLE: 1238 # exp.Properties.Location.POST_EXPRESSION 1239 extend_props(self._parse_properties()) 1240 1241 indexes = [] 1242 while True: 1243 index = self._parse_index() 1244 1245 # exp.Properties.Location.POST_INDEX 1246 extend_props(self._parse_properties()) 1247 1248 if not index: 1249 break 1250 else: 1251 self._match(TokenType.COMMA) 1252 indexes.append(index) 1253 elif create_token.token_type == TokenType.VIEW: 1254 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1255 no_schema_binding = True 1256 1257 if self._match_text_seq("CLONE"): 1258 clone = self._parse_table(schema=True) 1259 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1260 clone_kind = ( 1261 self._match(TokenType.L_PAREN) 1262 and self._match_texts(self.CLONE_KINDS) 1263 and self._prev.text.upper() 1264 ) 1265 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1266 self._match(TokenType.R_PAREN) 1267 clone = self.expression( 1268 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1269 ) 1270 1271 return self.expression( 1272 exp.Create, 1273 this=this, 1274 kind=create_token.text, 1275 replace=replace, 1276 unique=unique, 1277 expression=expression, 1278 exists=exists, 1279 properties=properties, 1280 indexes=indexes, 1281 no_schema_binding=no_schema_binding, 1282 begin=begin, 1283 clone=clone, 1284 ) 1285 1286 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1287 # only used for teradata currently 1288 self._match(TokenType.COMMA) 1289 1290 kwargs = { 1291 "no": self._match_text_seq("NO"), 1292 "dual": self._match_text_seq("DUAL"), 1293 "before": self._match_text_seq("BEFORE"), 1294 "default": self._match_text_seq("DEFAULT"), 1295 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1296 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1297 "after": self._match_text_seq("AFTER"), 1298 "minimum": self._match_texts(("MIN", "MINIMUM")), 1299 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1300 } 1301 1302 if self._match_texts(self.PROPERTY_PARSERS): 1303 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1304 try: 1305 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1306 except TypeError: 1307 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1308 1309 return None 1310 1311 def _parse_property(self) -> t.Optional[exp.Expression]: 1312 if self._match_texts(self.PROPERTY_PARSERS): 1313 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1314 1315 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1316 return self._parse_character_set(default=True) 1317 1318 if self._match_text_seq("COMPOUND", "SORTKEY"): 1319 return self._parse_sortkey(compound=True) 1320 1321 if self._match_text_seq("SQL", "SECURITY"): 1322 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1323 1324 assignment = self._match_pair( 1325 TokenType.VAR, TokenType.EQ, advance=False 1326 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1327 1328 if assignment: 1329 key = self._parse_var_or_string() 1330 self._match(TokenType.EQ) 1331 return self.expression(exp.Property, this=key, value=self._parse_column()) 1332 1333 return None 1334 1335 def _parse_stored(self) -> exp.FileFormatProperty: 1336 self._match(TokenType.ALIAS) 1337 1338 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1339 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1340 1341 return self.expression( 1342 exp.FileFormatProperty, 1343 this=self.expression( 1344 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1345 ) 1346 if input_format or output_format 1347 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1348 ) 1349 1350 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1351 self._match(TokenType.EQ) 1352 self._match(TokenType.ALIAS) 1353 return self.expression(exp_class, this=self._parse_field()) 1354 1355 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1356 properties = [] 1357 while True: 1358 if before: 1359 prop = self._parse_property_before() 1360 else: 1361 prop = self._parse_property() 1362 1363 if not prop: 1364 break 1365 for p in ensure_list(prop): 1366 properties.append(p) 1367 1368 if properties: 1369 return self.expression(exp.Properties, expressions=properties) 1370 1371 return None 1372 1373 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1374 return self.expression( 1375 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1376 ) 1377 1378 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1379 if self._index >= 2: 1380 pre_volatile_token = self._tokens[self._index - 2] 1381 else: 1382 pre_volatile_token = None 1383 1384 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1385 return exp.VolatileProperty() 1386 1387 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1388 1389 def _parse_with_property( 1390 self, 1391 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1392 if self._match(TokenType.L_PAREN, advance=False): 1393 return self._parse_wrapped_csv(self._parse_property) 1394 1395 if self._match_text_seq("JOURNAL"): 1396 return self._parse_withjournaltable() 1397 1398 if self._match_text_seq("DATA"): 1399 return self._parse_withdata(no=False) 1400 elif self._match_text_seq("NO", "DATA"): 1401 return self._parse_withdata(no=True) 1402 1403 if not self._next: 1404 return None 1405 1406 return self._parse_withisolatedloading() 1407 1408 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1409 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1410 self._match(TokenType.EQ) 1411 1412 user = self._parse_id_var() 1413 self._match(TokenType.PARAMETER) 1414 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1415 1416 if not user or not host: 1417 return None 1418 1419 return exp.DefinerProperty(this=f"{user}@{host}") 1420 1421 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1422 self._match(TokenType.TABLE) 1423 self._match(TokenType.EQ) 1424 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1425 1426 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1427 return self.expression(exp.LogProperty, no=no) 1428 1429 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1430 return self.expression(exp.JournalProperty, **kwargs) 1431 1432 def _parse_checksum(self) -> exp.ChecksumProperty: 1433 self._match(TokenType.EQ) 1434 1435 on = None 1436 if self._match(TokenType.ON): 1437 on = True 1438 elif self._match_text_seq("OFF"): 1439 on = False 1440 1441 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1442 1443 def _parse_cluster(self) -> exp.Cluster: 1444 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1445 1446 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1447 self._match_text_seq("BY") 1448 1449 self._match_l_paren() 1450 expressions = self._parse_csv(self._parse_column) 1451 self._match_r_paren() 1452 1453 if self._match_text_seq("SORTED", "BY"): 1454 self._match_l_paren() 1455 sorted_by = self._parse_csv(self._parse_ordered) 1456 self._match_r_paren() 1457 else: 1458 sorted_by = None 1459 1460 self._match(TokenType.INTO) 1461 buckets = self._parse_number() 1462 self._match_text_seq("BUCKETS") 1463 1464 return self.expression( 1465 exp.ClusteredByProperty, 1466 expressions=expressions, 1467 sorted_by=sorted_by, 1468 buckets=buckets, 1469 ) 1470 1471 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1472 if not self._match_text_seq("GRANTS"): 1473 self._retreat(self._index - 1) 1474 return None 1475 1476 return self.expression(exp.CopyGrantsProperty) 1477 1478 def _parse_freespace(self) -> exp.FreespaceProperty: 1479 self._match(TokenType.EQ) 1480 return self.expression( 1481 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1482 ) 1483 1484 def _parse_mergeblockratio( 1485 self, no: bool = False, default: bool = False 1486 ) -> exp.MergeBlockRatioProperty: 1487 if self._match(TokenType.EQ): 1488 return self.expression( 1489 exp.MergeBlockRatioProperty, 1490 this=self._parse_number(), 1491 percent=self._match(TokenType.PERCENT), 1492 ) 1493 1494 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1495 1496 def _parse_datablocksize( 1497 self, 1498 default: t.Optional[bool] = None, 1499 minimum: t.Optional[bool] = None, 1500 maximum: t.Optional[bool] = None, 1501 ) -> exp.DataBlocksizeProperty: 1502 self._match(TokenType.EQ) 1503 size = self._parse_number() 1504 1505 units = None 1506 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1507 units = self._prev.text 1508 1509 return self.expression( 1510 exp.DataBlocksizeProperty, 1511 size=size, 1512 units=units, 1513 default=default, 1514 minimum=minimum, 1515 maximum=maximum, 1516 ) 1517 1518 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1519 self._match(TokenType.EQ) 1520 always = self._match_text_seq("ALWAYS") 1521 manual = self._match_text_seq("MANUAL") 1522 never = self._match_text_seq("NEVER") 1523 default = self._match_text_seq("DEFAULT") 1524 1525 autotemp = None 1526 if self._match_text_seq("AUTOTEMP"): 1527 autotemp = self._parse_schema() 1528 1529 return self.expression( 1530 exp.BlockCompressionProperty, 1531 always=always, 1532 manual=manual, 1533 never=never, 1534 default=default, 1535 autotemp=autotemp, 1536 ) 1537 1538 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1539 no = self._match_text_seq("NO") 1540 concurrent = self._match_text_seq("CONCURRENT") 1541 self._match_text_seq("ISOLATED", "LOADING") 1542 for_all = self._match_text_seq("FOR", "ALL") 1543 for_insert = self._match_text_seq("FOR", "INSERT") 1544 for_none = self._match_text_seq("FOR", "NONE") 1545 return self.expression( 1546 exp.IsolatedLoadingProperty, 1547 no=no, 1548 concurrent=concurrent, 1549 for_all=for_all, 1550 for_insert=for_insert, 1551 for_none=for_none, 1552 ) 1553 1554 def _parse_locking(self) -> exp.LockingProperty: 1555 if self._match(TokenType.TABLE): 1556 kind = "TABLE" 1557 elif self._match(TokenType.VIEW): 1558 kind = "VIEW" 1559 elif self._match(TokenType.ROW): 1560 kind = "ROW" 1561 elif self._match_text_seq("DATABASE"): 1562 kind = "DATABASE" 1563 else: 1564 kind = None 1565 1566 if kind in ("DATABASE", "TABLE", "VIEW"): 1567 this = self._parse_table_parts() 1568 else: 1569 this = None 1570 1571 if self._match(TokenType.FOR): 1572 for_or_in = "FOR" 1573 elif self._match(TokenType.IN): 1574 for_or_in = "IN" 1575 else: 1576 for_or_in = None 1577 1578 if self._match_text_seq("ACCESS"): 1579 lock_type = "ACCESS" 1580 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1581 lock_type = "EXCLUSIVE" 1582 elif self._match_text_seq("SHARE"): 1583 lock_type = "SHARE" 1584 elif self._match_text_seq("READ"): 1585 lock_type = "READ" 1586 elif self._match_text_seq("WRITE"): 1587 lock_type = "WRITE" 1588 elif self._match_text_seq("CHECKSUM"): 1589 lock_type = "CHECKSUM" 1590 else: 1591 lock_type = None 1592 1593 override = self._match_text_seq("OVERRIDE") 1594 1595 return self.expression( 1596 exp.LockingProperty, 1597 this=this, 1598 kind=kind, 1599 for_or_in=for_or_in, 1600 lock_type=lock_type, 1601 override=override, 1602 ) 1603 1604 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1605 if self._match(TokenType.PARTITION_BY): 1606 return self._parse_csv(self._parse_conjunction) 1607 return [] 1608 1609 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1610 self._match(TokenType.EQ) 1611 return self.expression( 1612 exp.PartitionedByProperty, 1613 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1614 ) 1615 1616 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1617 if self._match_text_seq("AND", "STATISTICS"): 1618 statistics = True 1619 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1620 statistics = False 1621 else: 1622 statistics = None 1623 1624 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1625 1626 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1627 if self._match_text_seq("PRIMARY", "INDEX"): 1628 return exp.NoPrimaryIndexProperty() 1629 return None 1630 1631 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1632 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1633 return exp.OnCommitProperty() 1634 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1635 return exp.OnCommitProperty(delete=True) 1636 return None 1637 1638 def _parse_distkey(self) -> exp.DistKeyProperty: 1639 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1640 1641 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1642 table = self._parse_table(schema=True) 1643 1644 options = [] 1645 while self._match_texts(("INCLUDING", "EXCLUDING")): 1646 this = self._prev.text.upper() 1647 1648 id_var = self._parse_id_var() 1649 if not id_var: 1650 return None 1651 1652 options.append( 1653 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1654 ) 1655 1656 return self.expression(exp.LikeProperty, this=table, expressions=options) 1657 1658 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1659 return self.expression( 1660 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1661 ) 1662 1663 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1667 ) 1668 1669 def _parse_returns(self) -> exp.ReturnsProperty: 1670 value: t.Optional[exp.Expression] 1671 is_table = self._match(TokenType.TABLE) 1672 1673 if is_table: 1674 if self._match(TokenType.LT): 1675 value = self.expression( 1676 exp.Schema, 1677 this="TABLE", 1678 expressions=self._parse_csv(self._parse_struct_types), 1679 ) 1680 if not self._match(TokenType.GT): 1681 self.raise_error("Expecting >") 1682 else: 1683 value = self._parse_schema(exp.var("TABLE")) 1684 else: 1685 value = self._parse_types() 1686 1687 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1688 1689 def _parse_describe(self) -> exp.Describe: 1690 kind = self._match_set(self.CREATABLES) and self._prev.text 1691 this = self._parse_table() 1692 return self.expression(exp.Describe, this=this, kind=kind) 1693 1694 def _parse_insert(self) -> exp.Insert: 1695 overwrite = self._match(TokenType.OVERWRITE) 1696 ignore = self._match(TokenType.IGNORE) 1697 local = self._match_text_seq("LOCAL") 1698 alternative = None 1699 1700 if self._match_text_seq("DIRECTORY"): 1701 this: t.Optional[exp.Expression] = self.expression( 1702 exp.Directory, 1703 this=self._parse_var_or_string(), 1704 local=local, 1705 row_format=self._parse_row_format(match_row=True), 1706 ) 1707 else: 1708 if self._match(TokenType.OR): 1709 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1710 1711 self._match(TokenType.INTO) 1712 self._match(TokenType.TABLE) 1713 this = self._parse_table(schema=True) 1714 1715 returning = self._parse_returning() 1716 1717 return self.expression( 1718 exp.Insert, 1719 this=this, 1720 exists=self._parse_exists(), 1721 partition=self._parse_partition(), 1722 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1723 and self._parse_conjunction(), 1724 expression=self._parse_ddl_select(), 1725 conflict=self._parse_on_conflict(), 1726 returning=returning or self._parse_returning(), 1727 overwrite=overwrite, 1728 alternative=alternative, 1729 ignore=ignore, 1730 ) 1731 1732 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1733 conflict = self._match_text_seq("ON", "CONFLICT") 1734 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1735 1736 if not conflict and not duplicate: 1737 return None 1738 1739 nothing = None 1740 expressions = None 1741 key = None 1742 constraint = None 1743 1744 if conflict: 1745 if self._match_text_seq("ON", "CONSTRAINT"): 1746 constraint = self._parse_id_var() 1747 else: 1748 key = self._parse_csv(self._parse_value) 1749 1750 self._match_text_seq("DO") 1751 if self._match_text_seq("NOTHING"): 1752 nothing = True 1753 else: 1754 self._match(TokenType.UPDATE) 1755 self._match(TokenType.SET) 1756 expressions = self._parse_csv(self._parse_equality) 1757 1758 return self.expression( 1759 exp.OnConflict, 1760 duplicate=duplicate, 1761 expressions=expressions, 1762 nothing=nothing, 1763 key=key, 1764 constraint=constraint, 1765 ) 1766 1767 def _parse_returning(self) -> t.Optional[exp.Returning]: 1768 if not self._match(TokenType.RETURNING): 1769 return None 1770 return self.expression( 1771 exp.Returning, 1772 expressions=self._parse_csv(self._parse_expression), 1773 into=self._match(TokenType.INTO) and self._parse_table_part(), 1774 ) 1775 1776 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1777 if not self._match(TokenType.FORMAT): 1778 return None 1779 return self._parse_row_format() 1780 1781 def _parse_row_format( 1782 self, match_row: bool = False 1783 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1784 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1785 return None 1786 1787 if self._match_text_seq("SERDE"): 1788 this = self._parse_string() 1789 1790 serde_properties = None 1791 if self._match(TokenType.SERDE_PROPERTIES): 1792 serde_properties = self.expression( 1793 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1794 ) 1795 1796 return self.expression( 1797 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1798 ) 1799 1800 self._match_text_seq("DELIMITED") 1801 1802 kwargs = {} 1803 1804 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1805 kwargs["fields"] = self._parse_string() 1806 if self._match_text_seq("ESCAPED", "BY"): 1807 kwargs["escaped"] = self._parse_string() 1808 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1809 kwargs["collection_items"] = self._parse_string() 1810 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1811 kwargs["map_keys"] = self._parse_string() 1812 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1813 kwargs["lines"] = self._parse_string() 1814 if self._match_text_seq("NULL", "DEFINED", "AS"): 1815 kwargs["null"] = self._parse_string() 1816 1817 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1818 1819 def _parse_load(self) -> exp.LoadData | exp.Command: 1820 if self._match_text_seq("DATA"): 1821 local = self._match_text_seq("LOCAL") 1822 self._match_text_seq("INPATH") 1823 inpath = self._parse_string() 1824 overwrite = self._match(TokenType.OVERWRITE) 1825 self._match_pair(TokenType.INTO, TokenType.TABLE) 1826 1827 return self.expression( 1828 exp.LoadData, 1829 this=self._parse_table(schema=True), 1830 local=local, 1831 overwrite=overwrite, 1832 inpath=inpath, 1833 partition=self._parse_partition(), 1834 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1835 serde=self._match_text_seq("SERDE") and self._parse_string(), 1836 ) 1837 return self._parse_as_command(self._prev) 1838 1839 def _parse_delete(self) -> exp.Delete: 1840 # This handles MySQL's "Multiple-Table Syntax" 1841 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1842 tables = None 1843 if not self._match(TokenType.FROM, advance=False): 1844 tables = self._parse_csv(self._parse_table) or None 1845 1846 returning = self._parse_returning() 1847 1848 return self.expression( 1849 exp.Delete, 1850 tables=tables, 1851 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1852 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1853 where=self._parse_where(), 1854 returning=returning or self._parse_returning(), 1855 limit=self._parse_limit(), 1856 ) 1857 1858 def _parse_update(self) -> exp.Update: 1859 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1860 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1861 returning = self._parse_returning() 1862 return self.expression( 1863 exp.Update, 1864 **{ # type: ignore 1865 "this": this, 1866 "expressions": expressions, 1867 "from": self._parse_from(joins=True), 1868 "where": self._parse_where(), 1869 "returning": returning or self._parse_returning(), 1870 "limit": self._parse_limit(), 1871 }, 1872 ) 1873 1874 def _parse_uncache(self) -> exp.Uncache: 1875 if not self._match(TokenType.TABLE): 1876 self.raise_error("Expecting TABLE after UNCACHE") 1877 1878 return self.expression( 1879 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1880 ) 1881 1882 def _parse_cache(self) -> exp.Cache: 1883 lazy = self._match_text_seq("LAZY") 1884 self._match(TokenType.TABLE) 1885 table = self._parse_table(schema=True) 1886 1887 options = [] 1888 if self._match_text_seq("OPTIONS"): 1889 self._match_l_paren() 1890 k = self._parse_string() 1891 self._match(TokenType.EQ) 1892 v = self._parse_string() 1893 options = [k, v] 1894 self._match_r_paren() 1895 1896 self._match(TokenType.ALIAS) 1897 return self.expression( 1898 exp.Cache, 1899 this=table, 1900 lazy=lazy, 1901 options=options, 1902 expression=self._parse_select(nested=True), 1903 ) 1904 1905 def _parse_partition(self) -> t.Optional[exp.Partition]: 1906 if not self._match(TokenType.PARTITION): 1907 return None 1908 1909 return self.expression( 1910 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1911 ) 1912 1913 def _parse_value(self) -> exp.Tuple: 1914 if self._match(TokenType.L_PAREN): 1915 expressions = self._parse_csv(self._parse_conjunction) 1916 self._match_r_paren() 1917 return self.expression(exp.Tuple, expressions=expressions) 1918 1919 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1920 # https://prestodb.io/docs/current/sql/values.html 1921 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1922 1923 def _parse_select( 1924 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1925 ) -> t.Optional[exp.Expression]: 1926 cte = self._parse_with() 1927 if cte: 1928 this = self._parse_statement() 1929 1930 if not this: 1931 self.raise_error("Failed to parse any statement following CTE") 1932 return cte 1933 1934 if "with" in this.arg_types: 1935 this.set("with", cte) 1936 else: 1937 self.raise_error(f"{this.key} does not support CTE") 1938 this = cte 1939 elif self._match(TokenType.SELECT): 1940 comments = self._prev_comments 1941 1942 hint = self._parse_hint() 1943 all_ = self._match(TokenType.ALL) 1944 distinct = self._match(TokenType.DISTINCT) 1945 1946 kind = ( 1947 self._match(TokenType.ALIAS) 1948 and self._match_texts(("STRUCT", "VALUE")) 1949 and self._prev.text 1950 ) 1951 1952 if distinct: 1953 distinct = self.expression( 1954 exp.Distinct, 1955 on=self._parse_value() if self._match(TokenType.ON) else None, 1956 ) 1957 1958 if all_ and distinct: 1959 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1960 1961 limit = self._parse_limit(top=True) 1962 expressions = self._parse_expressions() 1963 1964 this = self.expression( 1965 exp.Select, 1966 kind=kind, 1967 hint=hint, 1968 distinct=distinct, 1969 expressions=expressions, 1970 limit=limit, 1971 ) 1972 this.comments = comments 1973 1974 into = self._parse_into() 1975 if into: 1976 this.set("into", into) 1977 1978 from_ = self._parse_from() 1979 if from_: 1980 this.set("from", from_) 1981 1982 this = self._parse_query_modifiers(this) 1983 elif (table or nested) and self._match(TokenType.L_PAREN): 1984 if self._match(TokenType.PIVOT): 1985 this = self._parse_simplified_pivot() 1986 elif self._match(TokenType.FROM): 1987 this = exp.select("*").from_( 1988 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1989 ) 1990 else: 1991 this = self._parse_table() if table else self._parse_select(nested=True) 1992 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1993 1994 self._match_r_paren() 1995 1996 # We return early here so that the UNION isn't attached to the subquery by the 1997 # following call to _parse_set_operations, but instead becomes the parent node 1998 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1999 elif self._match(TokenType.VALUES): 2000 this = self.expression( 2001 exp.Values, 2002 expressions=self._parse_csv(self._parse_value), 2003 alias=self._parse_table_alias(), 2004 ) 2005 else: 2006 this = None 2007 2008 return self._parse_set_operations(this) 2009 2010 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2011 if not skip_with_token and not self._match(TokenType.WITH): 2012 return None 2013 2014 comments = self._prev_comments 2015 recursive = self._match(TokenType.RECURSIVE) 2016 2017 expressions = [] 2018 while True: 2019 expressions.append(self._parse_cte()) 2020 2021 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2022 break 2023 else: 2024 self._match(TokenType.WITH) 2025 2026 return self.expression( 2027 exp.With, comments=comments, expressions=expressions, recursive=recursive 2028 ) 2029 2030 def _parse_cte(self) -> exp.CTE: 2031 alias = self._parse_table_alias() 2032 if not alias or not alias.this: 2033 self.raise_error("Expected CTE to have alias") 2034 2035 self._match(TokenType.ALIAS) 2036 return self.expression( 2037 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2038 ) 2039 2040 def _parse_table_alias( 2041 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2042 ) -> t.Optional[exp.TableAlias]: 2043 any_token = self._match(TokenType.ALIAS) 2044 alias = ( 2045 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2046 or self._parse_string_as_identifier() 2047 ) 2048 2049 index = self._index 2050 if self._match(TokenType.L_PAREN): 2051 columns = self._parse_csv(self._parse_function_parameter) 2052 self._match_r_paren() if columns else self._retreat(index) 2053 else: 2054 columns = None 2055 2056 if not alias and not columns: 2057 return None 2058 2059 return self.expression(exp.TableAlias, this=alias, columns=columns) 2060 2061 def _parse_subquery( 2062 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2063 ) -> t.Optional[exp.Subquery]: 2064 if not this: 2065 return None 2066 2067 return self.expression( 2068 exp.Subquery, 2069 this=this, 2070 pivots=self._parse_pivots(), 2071 alias=self._parse_table_alias() if parse_alias else None, 2072 ) 2073 2074 def _parse_query_modifiers( 2075 self, this: t.Optional[exp.Expression] 2076 ) -> t.Optional[exp.Expression]: 2077 if isinstance(this, self.MODIFIABLES): 2078 for join in iter(self._parse_join, None): 2079 this.append("joins", join) 2080 for lateral in iter(self._parse_lateral, None): 2081 this.append("laterals", lateral) 2082 2083 while True: 2084 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2085 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2086 key, expression = parser(self) 2087 2088 if expression: 2089 this.set(key, expression) 2090 if key == "limit": 2091 offset = expression.args.pop("offset", None) 2092 if offset: 2093 this.set("offset", exp.Offset(expression=offset)) 2094 continue 2095 break 2096 return this 2097 2098 def _parse_hint(self) -> t.Optional[exp.Hint]: 2099 if self._match(TokenType.HINT): 2100 hints = [] 2101 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2102 hints.extend(hint) 2103 2104 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2105 self.raise_error("Expected */ after HINT") 2106 2107 return self.expression(exp.Hint, expressions=hints) 2108 2109 return None 2110 2111 def _parse_into(self) -> t.Optional[exp.Into]: 2112 if not self._match(TokenType.INTO): 2113 return None 2114 2115 temp = self._match(TokenType.TEMPORARY) 2116 unlogged = self._match_text_seq("UNLOGGED") 2117 self._match(TokenType.TABLE) 2118 2119 return self.expression( 2120 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2121 ) 2122 2123 def _parse_from( 2124 self, joins: bool = False, skip_from_token: bool = False 2125 ) -> t.Optional[exp.From]: 2126 if not skip_from_token and not self._match(TokenType.FROM): 2127 return None 2128 2129 return self.expression( 2130 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2131 ) 2132 2133 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2134 if not self._match(TokenType.MATCH_RECOGNIZE): 2135 return None 2136 2137 self._match_l_paren() 2138 2139 partition = self._parse_partition_by() 2140 order = self._parse_order() 2141 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2142 2143 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2144 rows = exp.var("ONE ROW PER MATCH") 2145 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2146 text = "ALL ROWS PER MATCH" 2147 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2148 text += f" SHOW EMPTY MATCHES" 2149 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2150 text += f" OMIT EMPTY MATCHES" 2151 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2152 text += f" WITH UNMATCHED ROWS" 2153 rows = exp.var(text) 2154 else: 2155 rows = None 2156 2157 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2158 text = "AFTER MATCH SKIP" 2159 if self._match_text_seq("PAST", "LAST", "ROW"): 2160 text += f" PAST LAST ROW" 2161 elif self._match_text_seq("TO", "NEXT", "ROW"): 2162 text += f" TO NEXT ROW" 2163 elif self._match_text_seq("TO", "FIRST"): 2164 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2165 elif self._match_text_seq("TO", "LAST"): 2166 text += f" TO LAST {self._advance_any().text}" # type: ignore 2167 after = exp.var(text) 2168 else: 2169 after = None 2170 2171 if self._match_text_seq("PATTERN"): 2172 self._match_l_paren() 2173 2174 if not self._curr: 2175 self.raise_error("Expecting )", self._curr) 2176 2177 paren = 1 2178 start = self._curr 2179 2180 while self._curr and paren > 0: 2181 if self._curr.token_type == TokenType.L_PAREN: 2182 paren += 1 2183 if self._curr.token_type == TokenType.R_PAREN: 2184 paren -= 1 2185 2186 end = self._prev 2187 self._advance() 2188 2189 if paren > 0: 2190 self.raise_error("Expecting )", self._curr) 2191 2192 pattern = exp.var(self._find_sql(start, end)) 2193 else: 2194 pattern = None 2195 2196 define = ( 2197 self._parse_csv( 2198 lambda: self.expression( 2199 exp.Alias, 2200 alias=self._parse_id_var(any_token=True), 2201 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2202 ) 2203 ) 2204 if self._match_text_seq("DEFINE") 2205 else None 2206 ) 2207 2208 self._match_r_paren() 2209 2210 return self.expression( 2211 exp.MatchRecognize, 2212 partition_by=partition, 2213 order=order, 2214 measures=measures, 2215 rows=rows, 2216 after=after, 2217 pattern=pattern, 2218 define=define, 2219 alias=self._parse_table_alias(), 2220 ) 2221 2222 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2223 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2224 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2225 2226 if outer_apply or cross_apply: 2227 this = self._parse_select(table=True) 2228 view = None 2229 outer = not cross_apply 2230 elif self._match(TokenType.LATERAL): 2231 this = self._parse_select(table=True) 2232 view = self._match(TokenType.VIEW) 2233 outer = self._match(TokenType.OUTER) 2234 else: 2235 return None 2236 2237 if not this: 2238 this = self._parse_function() or self._parse_id_var(any_token=False) 2239 while self._match(TokenType.DOT): 2240 this = exp.Dot( 2241 this=this, 2242 expression=self._parse_function() or self._parse_id_var(any_token=False), 2243 ) 2244 2245 if view: 2246 table = self._parse_id_var(any_token=False) 2247 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2248 table_alias: t.Optional[exp.TableAlias] = self.expression( 2249 exp.TableAlias, this=table, columns=columns 2250 ) 2251 elif isinstance(this, exp.Subquery) and this.alias: 2252 # Ensures parity between the Subquery's and the Lateral's "alias" args 2253 table_alias = this.args["alias"].copy() 2254 else: 2255 table_alias = self._parse_table_alias() 2256 2257 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2258 2259 def _parse_join_parts( 2260 self, 2261 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2262 return ( 2263 self._match_set(self.JOIN_METHODS) and self._prev, 2264 self._match_set(self.JOIN_SIDES) and self._prev, 2265 self._match_set(self.JOIN_KINDS) and self._prev, 2266 ) 2267 2268 def _parse_join( 2269 self, skip_join_token: bool = False, parse_bracket: bool = False 2270 ) -> t.Optional[exp.Join]: 2271 if self._match(TokenType.COMMA): 2272 return self.expression(exp.Join, this=self._parse_table()) 2273 2274 index = self._index 2275 method, side, kind = self._parse_join_parts() 2276 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2277 join = self._match(TokenType.JOIN) 2278 2279 if not skip_join_token and not join: 2280 self._retreat(index) 2281 kind = None 2282 method = None 2283 side = None 2284 2285 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2286 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2287 2288 if not skip_join_token and not join and not outer_apply and not cross_apply: 2289 return None 2290 2291 if outer_apply: 2292 side = Token(TokenType.LEFT, "LEFT") 2293 2294 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2295 2296 if method: 2297 kwargs["method"] = method.text 2298 if side: 2299 kwargs["side"] = side.text 2300 if kind: 2301 kwargs["kind"] = kind.text 2302 if hint: 2303 kwargs["hint"] = hint 2304 2305 if self._match(TokenType.ON): 2306 kwargs["on"] = self._parse_conjunction() 2307 elif self._match(TokenType.USING): 2308 kwargs["using"] = self._parse_wrapped_id_vars() 2309 elif not (kind and kind.token_type == TokenType.CROSS): 2310 index = self._index 2311 joins = self._parse_joins() 2312 2313 if joins and self._match(TokenType.ON): 2314 kwargs["on"] = self._parse_conjunction() 2315 elif joins and self._match(TokenType.USING): 2316 kwargs["using"] = self._parse_wrapped_id_vars() 2317 else: 2318 joins = None 2319 self._retreat(index) 2320 2321 kwargs["this"].set("joins", joins) 2322 2323 return self.expression(exp.Join, **kwargs) 2324 2325 def _parse_index( 2326 self, 2327 index: t.Optional[exp.Expression] = None, 2328 ) -> t.Optional[exp.Index]: 2329 if index: 2330 unique = None 2331 primary = None 2332 amp = None 2333 2334 self._match(TokenType.ON) 2335 self._match(TokenType.TABLE) # hive 2336 table = self._parse_table_parts(schema=True) 2337 else: 2338 unique = self._match(TokenType.UNIQUE) 2339 primary = self._match_text_seq("PRIMARY") 2340 amp = self._match_text_seq("AMP") 2341 2342 if not self._match(TokenType.INDEX): 2343 return None 2344 2345 index = self._parse_id_var() 2346 table = None 2347 2348 using = self._parse_field() if self._match(TokenType.USING) else None 2349 2350 if self._match(TokenType.L_PAREN, advance=False): 2351 columns = self._parse_wrapped_csv(self._parse_ordered) 2352 else: 2353 columns = None 2354 2355 return self.expression( 2356 exp.Index, 2357 this=index, 2358 table=table, 2359 using=using, 2360 columns=columns, 2361 unique=unique, 2362 primary=primary, 2363 amp=amp, 2364 partition_by=self._parse_partition_by(), 2365 ) 2366 2367 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2368 hints: t.List[exp.Expression] = [] 2369 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2370 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2371 hints.append( 2372 self.expression( 2373 exp.WithTableHint, 2374 expressions=self._parse_csv( 2375 lambda: self._parse_function() or self._parse_var(any_token=True) 2376 ), 2377 ) 2378 ) 2379 self._match_r_paren() 2380 else: 2381 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2382 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2383 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2384 2385 self._match_texts({"INDEX", "KEY"}) 2386 if self._match(TokenType.FOR): 2387 hint.set("target", self._advance_any() and self._prev.text.upper()) 2388 2389 hint.set("expressions", self._parse_wrapped_id_vars()) 2390 hints.append(hint) 2391 2392 return hints or None 2393 2394 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2395 return ( 2396 (not schema and self._parse_function(optional_parens=False)) 2397 or self._parse_id_var(any_token=False) 2398 or self._parse_string_as_identifier() 2399 or self._parse_placeholder() 2400 ) 2401 2402 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2403 catalog = None 2404 db = None 2405 table = self._parse_table_part(schema=schema) 2406 2407 while self._match(TokenType.DOT): 2408 if catalog: 2409 # This allows nesting the table in arbitrarily many dot expressions if needed 2410 table = self.expression( 2411 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2412 ) 2413 else: 2414 catalog = db 2415 db = table 2416 table = self._parse_table_part(schema=schema) 2417 2418 if not table: 2419 self.raise_error(f"Expected table name but got {self._curr}") 2420 2421 return self.expression( 2422 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2423 ) 2424 2425 def _parse_table( 2426 self, 2427 schema: bool = False, 2428 joins: bool = False, 2429 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2430 parse_bracket: bool = False, 2431 ) -> t.Optional[exp.Expression]: 2432 lateral = self._parse_lateral() 2433 if lateral: 2434 return lateral 2435 2436 unnest = self._parse_unnest() 2437 if unnest: 2438 return unnest 2439 2440 values = self._parse_derived_table_values() 2441 if values: 2442 return values 2443 2444 subquery = self._parse_select(table=True) 2445 if subquery: 2446 if not subquery.args.get("pivots"): 2447 subquery.set("pivots", self._parse_pivots()) 2448 return subquery 2449 2450 bracket = parse_bracket and self._parse_bracket(None) 2451 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2452 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2453 2454 if schema: 2455 return self._parse_schema(this=this) 2456 2457 if self.ALIAS_POST_TABLESAMPLE: 2458 table_sample = self._parse_table_sample() 2459 2460 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2461 if alias: 2462 this.set("alias", alias) 2463 2464 if not this.args.get("pivots"): 2465 this.set("pivots", self._parse_pivots()) 2466 2467 this.set("hints", self._parse_table_hints()) 2468 2469 if not self.ALIAS_POST_TABLESAMPLE: 2470 table_sample = self._parse_table_sample() 2471 2472 if table_sample: 2473 table_sample.set("this", this) 2474 this = table_sample 2475 2476 if joins: 2477 for join in iter(self._parse_join, None): 2478 this.append("joins", join) 2479 2480 return this 2481 2482 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2483 if not self._match(TokenType.UNNEST): 2484 return None 2485 2486 expressions = self._parse_wrapped_csv(self._parse_type) 2487 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2488 2489 alias = self._parse_table_alias() if with_alias else None 2490 2491 if alias and self.UNNEST_COLUMN_ONLY: 2492 if alias.args.get("columns"): 2493 self.raise_error("Unexpected extra column alias in unnest.") 2494 2495 alias.set("columns", [alias.this]) 2496 alias.set("this", None) 2497 2498 offset = None 2499 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2500 self._match(TokenType.ALIAS) 2501 offset = self._parse_id_var() or exp.to_identifier("offset") 2502 2503 return self.expression( 2504 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2505 ) 2506 2507 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2508 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2509 if not is_derived and not self._match(TokenType.VALUES): 2510 return None 2511 2512 expressions = self._parse_csv(self._parse_value) 2513 alias = self._parse_table_alias() 2514 2515 if is_derived: 2516 self._match_r_paren() 2517 2518 return self.expression( 2519 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2520 ) 2521 2522 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2523 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2524 as_modifier and self._match_text_seq("USING", "SAMPLE") 2525 ): 2526 return None 2527 2528 bucket_numerator = None 2529 bucket_denominator = None 2530 bucket_field = None 2531 percent = None 2532 rows = None 2533 size = None 2534 seed = None 2535 2536 kind = ( 2537 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2538 ) 2539 method = self._parse_var(tokens=(TokenType.ROW,)) 2540 2541 self._match(TokenType.L_PAREN) 2542 2543 num = self._parse_number() 2544 2545 if self._match_text_seq("BUCKET"): 2546 bucket_numerator = self._parse_number() 2547 self._match_text_seq("OUT", "OF") 2548 bucket_denominator = bucket_denominator = self._parse_number() 2549 self._match(TokenType.ON) 2550 bucket_field = self._parse_field() 2551 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2552 percent = num 2553 elif self._match(TokenType.ROWS): 2554 rows = num 2555 else: 2556 size = num 2557 2558 self._match(TokenType.R_PAREN) 2559 2560 if self._match(TokenType.L_PAREN): 2561 method = self._parse_var() 2562 seed = self._match(TokenType.COMMA) and self._parse_number() 2563 self._match_r_paren() 2564 elif self._match_texts(("SEED", "REPEATABLE")): 2565 seed = self._parse_wrapped(self._parse_number) 2566 2567 return self.expression( 2568 exp.TableSample, 2569 method=method, 2570 bucket_numerator=bucket_numerator, 2571 bucket_denominator=bucket_denominator, 2572 bucket_field=bucket_field, 2573 percent=percent, 2574 rows=rows, 2575 size=size, 2576 seed=seed, 2577 kind=kind, 2578 ) 2579 2580 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2581 return list(iter(self._parse_pivot, None)) or None 2582 2583 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2584 return list(iter(self._parse_join, None)) or None 2585 2586 # https://duckdb.org/docs/sql/statements/pivot 2587 def _parse_simplified_pivot(self) -> exp.Pivot: 2588 def _parse_on() -> t.Optional[exp.Expression]: 2589 this = self._parse_bitwise() 2590 return self._parse_in(this) if self._match(TokenType.IN) else this 2591 2592 this = self._parse_table() 2593 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2594 using = self._match(TokenType.USING) and self._parse_csv( 2595 lambda: self._parse_alias(self._parse_function()) 2596 ) 2597 group = self._parse_group() 2598 return self.expression( 2599 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2600 ) 2601 2602 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2603 index = self._index 2604 2605 if self._match(TokenType.PIVOT): 2606 unpivot = False 2607 elif self._match(TokenType.UNPIVOT): 2608 unpivot = True 2609 else: 2610 return None 2611 2612 expressions = [] 2613 field = None 2614 2615 if not self._match(TokenType.L_PAREN): 2616 self._retreat(index) 2617 return None 2618 2619 if unpivot: 2620 expressions = self._parse_csv(self._parse_column) 2621 else: 2622 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2623 2624 if not expressions: 2625 self.raise_error("Failed to parse PIVOT's aggregation list") 2626 2627 if not self._match(TokenType.FOR): 2628 self.raise_error("Expecting FOR") 2629 2630 value = self._parse_column() 2631 2632 if not self._match(TokenType.IN): 2633 self.raise_error("Expecting IN") 2634 2635 field = self._parse_in(value, alias=True) 2636 2637 self._match_r_paren() 2638 2639 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2640 2641 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2642 pivot.set("alias", self._parse_table_alias()) 2643 2644 if not unpivot: 2645 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2646 2647 columns: t.List[exp.Expression] = [] 2648 for fld in pivot.args["field"].expressions: 2649 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2650 for name in names: 2651 if self.PREFIXED_PIVOT_COLUMNS: 2652 name = f"{name}_{field_name}" if name else field_name 2653 else: 2654 name = f"{field_name}_{name}" if name else field_name 2655 2656 columns.append(exp.to_identifier(name)) 2657 2658 pivot.set("columns", columns) 2659 2660 return pivot 2661 2662 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2663 return [agg.alias for agg in aggregations] 2664 2665 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2666 if not skip_where_token and not self._match(TokenType.WHERE): 2667 return None 2668 2669 return self.expression( 2670 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2671 ) 2672 2673 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2674 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2675 return None 2676 2677 elements = defaultdict(list) 2678 2679 if self._match(TokenType.ALL): 2680 return self.expression(exp.Group, all=True) 2681 2682 while True: 2683 expressions = self._parse_csv(self._parse_conjunction) 2684 if expressions: 2685 elements["expressions"].extend(expressions) 2686 2687 grouping_sets = self._parse_grouping_sets() 2688 if grouping_sets: 2689 elements["grouping_sets"].extend(grouping_sets) 2690 2691 rollup = None 2692 cube = None 2693 totals = None 2694 2695 with_ = self._match(TokenType.WITH) 2696 if self._match(TokenType.ROLLUP): 2697 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2698 elements["rollup"].extend(ensure_list(rollup)) 2699 2700 if self._match(TokenType.CUBE): 2701 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2702 elements["cube"].extend(ensure_list(cube)) 2703 2704 if self._match_text_seq("TOTALS"): 2705 totals = True 2706 elements["totals"] = True # type: ignore 2707 2708 if not (grouping_sets or rollup or cube or totals): 2709 break 2710 2711 return self.expression(exp.Group, **elements) # type: ignore 2712 2713 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2714 if not self._match(TokenType.GROUPING_SETS): 2715 return None 2716 2717 return self._parse_wrapped_csv(self._parse_grouping_set) 2718 2719 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2720 if self._match(TokenType.L_PAREN): 2721 grouping_set = self._parse_csv(self._parse_column) 2722 self._match_r_paren() 2723 return self.expression(exp.Tuple, expressions=grouping_set) 2724 2725 return self._parse_column() 2726 2727 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2728 if not skip_having_token and not self._match(TokenType.HAVING): 2729 return None 2730 return self.expression(exp.Having, this=self._parse_conjunction()) 2731 2732 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2733 if not self._match(TokenType.QUALIFY): 2734 return None 2735 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2736 2737 def _parse_order( 2738 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2739 ) -> t.Optional[exp.Expression]: 2740 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2741 return this 2742 2743 return self.expression( 2744 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2745 ) 2746 2747 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2748 if not self._match(token): 2749 return None 2750 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2751 2752 def _parse_ordered(self) -> exp.Ordered: 2753 this = self._parse_conjunction() 2754 self._match(TokenType.ASC) 2755 2756 is_desc = self._match(TokenType.DESC) 2757 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2758 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2759 desc = is_desc or False 2760 asc = not desc 2761 nulls_first = is_nulls_first or False 2762 explicitly_null_ordered = is_nulls_first or is_nulls_last 2763 2764 if ( 2765 not explicitly_null_ordered 2766 and ( 2767 (asc and self.NULL_ORDERING == "nulls_are_small") 2768 or (desc and self.NULL_ORDERING != "nulls_are_small") 2769 ) 2770 and self.NULL_ORDERING != "nulls_are_last" 2771 ): 2772 nulls_first = True 2773 2774 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2775 2776 def _parse_limit( 2777 self, this: t.Optional[exp.Expression] = None, top: bool = False 2778 ) -> t.Optional[exp.Expression]: 2779 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2780 comments = self._prev_comments 2781 if top: 2782 limit_paren = self._match(TokenType.L_PAREN) 2783 expression = self._parse_number() 2784 2785 if limit_paren: 2786 self._match_r_paren() 2787 else: 2788 expression = self._parse_term() 2789 2790 if self._match(TokenType.COMMA): 2791 offset = expression 2792 expression = self._parse_term() 2793 else: 2794 offset = None 2795 2796 limit_exp = self.expression( 2797 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2798 ) 2799 2800 return limit_exp 2801 2802 if self._match(TokenType.FETCH): 2803 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2804 direction = self._prev.text if direction else "FIRST" 2805 2806 count = self._parse_number() 2807 percent = self._match(TokenType.PERCENT) 2808 2809 self._match_set((TokenType.ROW, TokenType.ROWS)) 2810 2811 only = self._match_text_seq("ONLY") 2812 with_ties = self._match_text_seq("WITH", "TIES") 2813 2814 if only and with_ties: 2815 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2816 2817 return self.expression( 2818 exp.Fetch, 2819 direction=direction, 2820 count=count, 2821 percent=percent, 2822 with_ties=with_ties, 2823 ) 2824 2825 return this 2826 2827 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2828 if not self._match(TokenType.OFFSET): 2829 return this 2830 2831 count = self._parse_term() 2832 self._match_set((TokenType.ROW, TokenType.ROWS)) 2833 return self.expression(exp.Offset, this=this, expression=count) 2834 2835 def _parse_locks(self) -> t.List[exp.Lock]: 2836 locks = [] 2837 while True: 2838 if self._match_text_seq("FOR", "UPDATE"): 2839 update = True 2840 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2841 "LOCK", "IN", "SHARE", "MODE" 2842 ): 2843 update = False 2844 else: 2845 break 2846 2847 expressions = None 2848 if self._match_text_seq("OF"): 2849 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2850 2851 wait: t.Optional[bool | exp.Expression] = None 2852 if self._match_text_seq("NOWAIT"): 2853 wait = True 2854 elif self._match_text_seq("WAIT"): 2855 wait = self._parse_primary() 2856 elif self._match_text_seq("SKIP", "LOCKED"): 2857 wait = False 2858 2859 locks.append( 2860 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2861 ) 2862 2863 return locks 2864 2865 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2866 if not self._match_set(self.SET_OPERATIONS): 2867 return this 2868 2869 token_type = self._prev.token_type 2870 2871 if token_type == TokenType.UNION: 2872 expression = exp.Union 2873 elif token_type == TokenType.EXCEPT: 2874 expression = exp.Except 2875 else: 2876 expression = exp.Intersect 2877 2878 return self.expression( 2879 expression, 2880 this=this, 2881 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2882 expression=self._parse_set_operations(self._parse_select(nested=True)), 2883 ) 2884 2885 def _parse_expression(self) -> t.Optional[exp.Expression]: 2886 return self._parse_alias(self._parse_conjunction()) 2887 2888 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2889 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2890 2891 def _parse_equality(self) -> t.Optional[exp.Expression]: 2892 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2893 2894 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2895 return self._parse_tokens(self._parse_range, self.COMPARISON) 2896 2897 def _parse_range(self) -> t.Optional[exp.Expression]: 2898 this = self._parse_bitwise() 2899 negate = self._match(TokenType.NOT) 2900 2901 if self._match_set(self.RANGE_PARSERS): 2902 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2903 if not expression: 2904 return this 2905 2906 this = expression 2907 elif self._match(TokenType.ISNULL): 2908 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2909 2910 # Postgres supports ISNULL and NOTNULL for conditions. 2911 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2912 if self._match(TokenType.NOTNULL): 2913 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2914 this = self.expression(exp.Not, this=this) 2915 2916 if negate: 2917 this = self.expression(exp.Not, this=this) 2918 2919 if self._match(TokenType.IS): 2920 this = self._parse_is(this) 2921 2922 return this 2923 2924 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2925 index = self._index - 1 2926 negate = self._match(TokenType.NOT) 2927 2928 if self._match_text_seq("DISTINCT", "FROM"): 2929 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2930 return self.expression(klass, this=this, expression=self._parse_expression()) 2931 2932 expression = self._parse_null() or self._parse_boolean() 2933 if not expression: 2934 self._retreat(index) 2935 return None 2936 2937 this = self.expression(exp.Is, this=this, expression=expression) 2938 return self.expression(exp.Not, this=this) if negate else this 2939 2940 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2941 unnest = self._parse_unnest(with_alias=False) 2942 if unnest: 2943 this = self.expression(exp.In, this=this, unnest=unnest) 2944 elif self._match(TokenType.L_PAREN): 2945 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2946 2947 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2948 this = self.expression(exp.In, this=this, query=expressions[0]) 2949 else: 2950 this = self.expression(exp.In, this=this, expressions=expressions) 2951 2952 self._match_r_paren(this) 2953 else: 2954 this = self.expression(exp.In, this=this, field=self._parse_field()) 2955 2956 return this 2957 2958 def _parse_between(self, this: exp.Expression) -> exp.Between: 2959 low = self._parse_bitwise() 2960 self._match(TokenType.AND) 2961 high = self._parse_bitwise() 2962 return self.expression(exp.Between, this=this, low=low, high=high) 2963 2964 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2965 if not self._match(TokenType.ESCAPE): 2966 return this 2967 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2968 2969 def _parse_interval(self) -> t.Optional[exp.Interval]: 2970 if not self._match(TokenType.INTERVAL): 2971 return None 2972 2973 if self._match(TokenType.STRING, advance=False): 2974 this = self._parse_primary() 2975 else: 2976 this = self._parse_term() 2977 2978 unit = self._parse_function() or self._parse_var() 2979 2980 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2981 # each INTERVAL expression into this canonical form so it's easy to transpile 2982 if this and this.is_number: 2983 this = exp.Literal.string(this.name) 2984 elif this and this.is_string: 2985 parts = this.name.split() 2986 2987 if len(parts) == 2: 2988 if unit: 2989 # this is not actually a unit, it's something else 2990 unit = None 2991 self._retreat(self._index - 1) 2992 else: 2993 this = exp.Literal.string(parts[0]) 2994 unit = self.expression(exp.Var, this=parts[1]) 2995 2996 return self.expression(exp.Interval, this=this, unit=unit) 2997 2998 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2999 this = self._parse_term() 3000 3001 while True: 3002 if self._match_set(self.BITWISE): 3003 this = self.expression( 3004 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3005 ) 3006 elif self._match_pair(TokenType.LT, TokenType.LT): 3007 this = self.expression( 3008 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3009 ) 3010 elif self._match_pair(TokenType.GT, TokenType.GT): 3011 this = self.expression( 3012 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3013 ) 3014 else: 3015 break 3016 3017 return this 3018 3019 def _parse_term(self) -> t.Optional[exp.Expression]: 3020 return self._parse_tokens(self._parse_factor, self.TERM) 3021 3022 def _parse_factor(self) -> t.Optional[exp.Expression]: 3023 return self._parse_tokens(self._parse_unary, self.FACTOR) 3024 3025 def _parse_unary(self) -> t.Optional[exp.Expression]: 3026 if self._match_set(self.UNARY_PARSERS): 3027 return self.UNARY_PARSERS[self._prev.token_type](self) 3028 return self._parse_at_time_zone(self._parse_type()) 3029 3030 def _parse_type(self) -> t.Optional[exp.Expression]: 3031 interval = self._parse_interval() 3032 if interval: 3033 return interval 3034 3035 index = self._index 3036 data_type = self._parse_types(check_func=True) 3037 this = self._parse_column() 3038 3039 if data_type: 3040 if isinstance(this, exp.Literal): 3041 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3042 if parser: 3043 return parser(self, this, data_type) 3044 return self.expression(exp.Cast, this=this, to=data_type) 3045 if not data_type.expressions: 3046 self._retreat(index) 3047 return self._parse_column() 3048 return self._parse_column_ops(data_type) 3049 3050 return this 3051 3052 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3053 this = self._parse_type() 3054 if not this: 3055 return None 3056 3057 return self.expression( 3058 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3059 ) 3060 3061 def _parse_types( 3062 self, check_func: bool = False, schema: bool = False 3063 ) -> t.Optional[exp.Expression]: 3064 index = self._index 3065 3066 prefix = self._match_text_seq("SYSUDTLIB", ".") 3067 3068 if not self._match_set(self.TYPE_TOKENS): 3069 return None 3070 3071 type_token = self._prev.token_type 3072 3073 if type_token == TokenType.PSEUDO_TYPE: 3074 return self.expression(exp.PseudoType, this=self._prev.text) 3075 3076 nested = type_token in self.NESTED_TYPE_TOKENS 3077 is_struct = type_token == TokenType.STRUCT 3078 expressions = None 3079 maybe_func = False 3080 3081 if self._match(TokenType.L_PAREN): 3082 if is_struct: 3083 expressions = self._parse_csv(self._parse_struct_types) 3084 elif nested: 3085 expressions = self._parse_csv( 3086 lambda: self._parse_types(check_func=check_func, schema=schema) 3087 ) 3088 elif type_token in self.ENUM_TYPE_TOKENS: 3089 expressions = self._parse_csv(self._parse_primary) 3090 else: 3091 expressions = self._parse_csv(self._parse_type_size) 3092 3093 if not expressions or not self._match(TokenType.R_PAREN): 3094 self._retreat(index) 3095 return None 3096 3097 maybe_func = True 3098 3099 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3100 this = exp.DataType( 3101 this=exp.DataType.Type.ARRAY, 3102 expressions=[ 3103 exp.DataType( 3104 this=exp.DataType.Type[type_token.value], 3105 expressions=expressions, 3106 nested=nested, 3107 ) 3108 ], 3109 nested=True, 3110 ) 3111 3112 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3113 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3114 3115 return this 3116 3117 if self._match(TokenType.L_BRACKET): 3118 self._retreat(index) 3119 return None 3120 3121 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3122 if nested and self._match(TokenType.LT): 3123 if is_struct: 3124 expressions = self._parse_csv(self._parse_struct_types) 3125 else: 3126 expressions = self._parse_csv( 3127 lambda: self._parse_types(check_func=check_func, schema=schema) 3128 ) 3129 3130 if not self._match(TokenType.GT): 3131 self.raise_error("Expecting >") 3132 3133 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3134 values = self._parse_csv(self._parse_conjunction) 3135 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3136 3137 value: t.Optional[exp.Expression] = None 3138 if type_token in self.TIMESTAMPS: 3139 if self._match_text_seq("WITH", "TIME", "ZONE"): 3140 maybe_func = False 3141 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3142 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3143 maybe_func = False 3144 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3145 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3146 maybe_func = False 3147 elif type_token == TokenType.INTERVAL: 3148 unit = self._parse_var() 3149 3150 if not unit: 3151 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3152 else: 3153 value = self.expression(exp.Interval, unit=unit) 3154 3155 if maybe_func and check_func: 3156 index2 = self._index 3157 peek = self._parse_string() 3158 3159 if not peek: 3160 self._retreat(index) 3161 return None 3162 3163 self._retreat(index2) 3164 3165 if value: 3166 return value 3167 3168 return exp.DataType( 3169 this=exp.DataType.Type[type_token.value], 3170 expressions=expressions, 3171 nested=nested, 3172 values=values, 3173 prefix=prefix, 3174 ) 3175 3176 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3177 this = self._parse_type() or self._parse_id_var() 3178 self._match(TokenType.COLON) 3179 return self._parse_column_def(this) 3180 3181 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3182 if not self._match_text_seq("AT", "TIME", "ZONE"): 3183 return this 3184 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3185 3186 def _parse_column(self) -> t.Optional[exp.Expression]: 3187 this = self._parse_field() 3188 if isinstance(this, exp.Identifier): 3189 this = self.expression(exp.Column, this=this) 3190 elif not this: 3191 return self._parse_bracket(this) 3192 return self._parse_column_ops(this) 3193 3194 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3195 this = self._parse_bracket(this) 3196 3197 while self._match_set(self.COLUMN_OPERATORS): 3198 op_token = self._prev.token_type 3199 op = self.COLUMN_OPERATORS.get(op_token) 3200 3201 if op_token == TokenType.DCOLON: 3202 field = self._parse_types() 3203 if not field: 3204 self.raise_error("Expected type") 3205 elif op and self._curr: 3206 self._advance() 3207 value = self._prev.text 3208 field = ( 3209 exp.Literal.number(value) 3210 if self._prev.token_type == TokenType.NUMBER 3211 else exp.Literal.string(value) 3212 ) 3213 else: 3214 field = self._parse_field(anonymous_func=True, any_token=True) 3215 3216 if isinstance(field, exp.Func): 3217 # bigquery allows function calls like x.y.count(...) 3218 # SAFE.SUBSTR(...) 3219 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3220 this = self._replace_columns_with_dots(this) 3221 3222 if op: 3223 this = op(self, this, field) 3224 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3225 this = self.expression( 3226 exp.Column, 3227 this=field, 3228 table=this.this, 3229 db=this.args.get("table"), 3230 catalog=this.args.get("db"), 3231 ) 3232 else: 3233 this = self.expression(exp.Dot, this=this, expression=field) 3234 this = self._parse_bracket(this) 3235 return this 3236 3237 def _parse_primary(self) -> t.Optional[exp.Expression]: 3238 if self._match_set(self.PRIMARY_PARSERS): 3239 token_type = self._prev.token_type 3240 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3241 3242 if token_type == TokenType.STRING: 3243 expressions = [primary] 3244 while self._match(TokenType.STRING): 3245 expressions.append(exp.Literal.string(self._prev.text)) 3246 3247 if len(expressions) > 1: 3248 return self.expression(exp.Concat, expressions=expressions) 3249 3250 return primary 3251 3252 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3253 return exp.Literal.number(f"0.{self._prev.text}") 3254 3255 if self._match(TokenType.L_PAREN): 3256 comments = self._prev_comments 3257 query = self._parse_select() 3258 3259 if query: 3260 expressions = [query] 3261 else: 3262 expressions = self._parse_expressions() 3263 3264 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3265 3266 if isinstance(this, exp.Subqueryable): 3267 this = self._parse_set_operations( 3268 self._parse_subquery(this=this, parse_alias=False) 3269 ) 3270 elif len(expressions) > 1: 3271 this = self.expression(exp.Tuple, expressions=expressions) 3272 else: 3273 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3274 3275 if this: 3276 this.add_comments(comments) 3277 3278 self._match_r_paren(expression=this) 3279 return this 3280 3281 return None 3282 3283 def _parse_field( 3284 self, 3285 any_token: bool = False, 3286 tokens: t.Optional[t.Collection[TokenType]] = None, 3287 anonymous_func: bool = False, 3288 ) -> t.Optional[exp.Expression]: 3289 return ( 3290 self._parse_primary() 3291 or self._parse_function(anonymous=anonymous_func) 3292 or self._parse_id_var(any_token=any_token, tokens=tokens) 3293 ) 3294 3295 def _parse_function( 3296 self, 3297 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3298 anonymous: bool = False, 3299 optional_parens: bool = True, 3300 ) -> t.Optional[exp.Expression]: 3301 if not self._curr: 3302 return None 3303 3304 token_type = self._curr.token_type 3305 3306 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3307 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3308 3309 if not self._next or self._next.token_type != TokenType.L_PAREN: 3310 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3311 self._advance() 3312 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3313 3314 return None 3315 3316 if token_type not in self.FUNC_TOKENS: 3317 return None 3318 3319 this = self._curr.text 3320 upper = this.upper() 3321 self._advance(2) 3322 3323 parser = self.FUNCTION_PARSERS.get(upper) 3324 3325 if parser and not anonymous: 3326 this = parser(self) 3327 else: 3328 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3329 3330 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3331 this = self.expression(subquery_predicate, this=self._parse_select()) 3332 self._match_r_paren() 3333 return this 3334 3335 if functions is None: 3336 functions = self.FUNCTIONS 3337 3338 function = functions.get(upper) 3339 3340 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3341 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3342 3343 if function and not anonymous: 3344 this = self.validate_expression(function(args), args) 3345 else: 3346 this = self.expression(exp.Anonymous, this=this, expressions=args) 3347 3348 self._match(TokenType.R_PAREN, expression=this) 3349 return self._parse_window(this) 3350 3351 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3352 return self._parse_column_def(self._parse_id_var()) 3353 3354 def _parse_user_defined_function( 3355 self, kind: t.Optional[TokenType] = None 3356 ) -> t.Optional[exp.Expression]: 3357 this = self._parse_id_var() 3358 3359 while self._match(TokenType.DOT): 3360 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3361 3362 if not self._match(TokenType.L_PAREN): 3363 return this 3364 3365 expressions = self._parse_csv(self._parse_function_parameter) 3366 self._match_r_paren() 3367 return self.expression( 3368 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3369 ) 3370 3371 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3372 literal = self._parse_primary() 3373 if literal: 3374 return self.expression(exp.Introducer, this=token.text, expression=literal) 3375 3376 return self.expression(exp.Identifier, this=token.text) 3377 3378 def _parse_session_parameter(self) -> exp.SessionParameter: 3379 kind = None 3380 this = self._parse_id_var() or self._parse_primary() 3381 3382 if this and self._match(TokenType.DOT): 3383 kind = this.name 3384 this = self._parse_var() or self._parse_primary() 3385 3386 return self.expression(exp.SessionParameter, this=this, kind=kind) 3387 3388 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3389 index = self._index 3390 3391 if self._match(TokenType.L_PAREN): 3392 expressions = self._parse_csv(self._parse_id_var) 3393 3394 if not self._match(TokenType.R_PAREN): 3395 self._retreat(index) 3396 else: 3397 expressions = [self._parse_id_var()] 3398 3399 if self._match_set(self.LAMBDAS): 3400 return self.LAMBDAS[self._prev.token_type](self, expressions) 3401 3402 self._retreat(index) 3403 3404 this: t.Optional[exp.Expression] 3405 3406 if self._match(TokenType.DISTINCT): 3407 this = self.expression( 3408 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3409 ) 3410 else: 3411 this = self._parse_select_or_expression(alias=alias) 3412 3413 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3414 3415 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3416 index = self._index 3417 3418 if not self.errors: 3419 try: 3420 if self._parse_select(nested=True): 3421 return this 3422 except ParseError: 3423 pass 3424 finally: 3425 self.errors.clear() 3426 self._retreat(index) 3427 3428 if not self._match(TokenType.L_PAREN): 3429 return this 3430 3431 args = self._parse_csv( 3432 lambda: self._parse_constraint() 3433 or self._parse_column_def(self._parse_field(any_token=True)) 3434 ) 3435 3436 self._match_r_paren() 3437 return self.expression(exp.Schema, this=this, expressions=args) 3438 3439 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3440 # column defs are not really columns, they're identifiers 3441 if isinstance(this, exp.Column): 3442 this = this.this 3443 3444 kind = self._parse_types(schema=True) 3445 3446 if self._match_text_seq("FOR", "ORDINALITY"): 3447 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3448 3449 constraints = [] 3450 while True: 3451 constraint = self._parse_column_constraint() 3452 if not constraint: 3453 break 3454 constraints.append(constraint) 3455 3456 if not kind and not constraints: 3457 return this 3458 3459 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3460 3461 def _parse_auto_increment( 3462 self, 3463 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3464 start = None 3465 increment = None 3466 3467 if self._match(TokenType.L_PAREN, advance=False): 3468 args = self._parse_wrapped_csv(self._parse_bitwise) 3469 start = seq_get(args, 0) 3470 increment = seq_get(args, 1) 3471 elif self._match_text_seq("START"): 3472 start = self._parse_bitwise() 3473 self._match_text_seq("INCREMENT") 3474 increment = self._parse_bitwise() 3475 3476 if start and increment: 3477 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3478 3479 return exp.AutoIncrementColumnConstraint() 3480 3481 def _parse_compress(self) -> exp.CompressColumnConstraint: 3482 if self._match(TokenType.L_PAREN, advance=False): 3483 return self.expression( 3484 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3485 ) 3486 3487 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3488 3489 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3490 if self._match_text_seq("BY", "DEFAULT"): 3491 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3492 this = self.expression( 3493 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3494 ) 3495 else: 3496 self._match_text_seq("ALWAYS") 3497 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3498 3499 self._match(TokenType.ALIAS) 3500 identity = self._match_text_seq("IDENTITY") 3501 3502 if self._match(TokenType.L_PAREN): 3503 if self._match_text_seq("START", "WITH"): 3504 this.set("start", self._parse_bitwise()) 3505 if self._match_text_seq("INCREMENT", "BY"): 3506 this.set("increment", self._parse_bitwise()) 3507 if self._match_text_seq("MINVALUE"): 3508 this.set("minvalue", self._parse_bitwise()) 3509 if self._match_text_seq("MAXVALUE"): 3510 this.set("maxvalue", self._parse_bitwise()) 3511 3512 if self._match_text_seq("CYCLE"): 3513 this.set("cycle", True) 3514 elif self._match_text_seq("NO", "CYCLE"): 3515 this.set("cycle", False) 3516 3517 if not identity: 3518 this.set("expression", self._parse_bitwise()) 3519 3520 self._match_r_paren() 3521 3522 return this 3523 3524 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3525 self._match_text_seq("LENGTH") 3526 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3527 3528 def _parse_not_constraint( 3529 self, 3530 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3531 if self._match_text_seq("NULL"): 3532 return self.expression(exp.NotNullColumnConstraint) 3533 if self._match_text_seq("CASESPECIFIC"): 3534 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3535 return None 3536 3537 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3538 if self._match(TokenType.CONSTRAINT): 3539 this = self._parse_id_var() 3540 else: 3541 this = None 3542 3543 if self._match_texts(self.CONSTRAINT_PARSERS): 3544 return self.expression( 3545 exp.ColumnConstraint, 3546 this=this, 3547 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3548 ) 3549 3550 return this 3551 3552 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3553 if not self._match(TokenType.CONSTRAINT): 3554 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3555 3556 this = self._parse_id_var() 3557 expressions = [] 3558 3559 while True: 3560 constraint = self._parse_unnamed_constraint() or self._parse_function() 3561 if not constraint: 3562 break 3563 expressions.append(constraint) 3564 3565 return self.expression(exp.Constraint, this=this, expressions=expressions) 3566 3567 def _parse_unnamed_constraint( 3568 self, constraints: t.Optional[t.Collection[str]] = None 3569 ) -> t.Optional[exp.Expression]: 3570 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3571 return None 3572 3573 constraint = self._prev.text.upper() 3574 if constraint not in self.CONSTRAINT_PARSERS: 3575 self.raise_error(f"No parser found for schema constraint {constraint}.") 3576 3577 return self.CONSTRAINT_PARSERS[constraint](self) 3578 3579 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3580 self._match_text_seq("KEY") 3581 return self.expression( 3582 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3583 ) 3584 3585 def _parse_key_constraint_options(self) -> t.List[str]: 3586 options = [] 3587 while True: 3588 if not self._curr: 3589 break 3590 3591 if self._match(TokenType.ON): 3592 action = None 3593 on = self._advance_any() and self._prev.text 3594 3595 if self._match_text_seq("NO", "ACTION"): 3596 action = "NO ACTION" 3597 elif self._match_text_seq("CASCADE"): 3598 action = "CASCADE" 3599 elif self._match_pair(TokenType.SET, TokenType.NULL): 3600 action = "SET NULL" 3601 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3602 action = "SET DEFAULT" 3603 else: 3604 self.raise_error("Invalid key constraint") 3605 3606 options.append(f"ON {on} {action}") 3607 elif self._match_text_seq("NOT", "ENFORCED"): 3608 options.append("NOT ENFORCED") 3609 elif self._match_text_seq("DEFERRABLE"): 3610 options.append("DEFERRABLE") 3611 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3612 options.append("INITIALLY DEFERRED") 3613 elif self._match_text_seq("NORELY"): 3614 options.append("NORELY") 3615 elif self._match_text_seq("MATCH", "FULL"): 3616 options.append("MATCH FULL") 3617 else: 3618 break 3619 3620 return options 3621 3622 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3623 if match and not self._match(TokenType.REFERENCES): 3624 return None 3625 3626 expressions = None 3627 this = self._parse_table(schema=True) 3628 options = self._parse_key_constraint_options() 3629 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3630 3631 def _parse_foreign_key(self) -> exp.ForeignKey: 3632 expressions = self._parse_wrapped_id_vars() 3633 reference = self._parse_references() 3634 options = {} 3635 3636 while self._match(TokenType.ON): 3637 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3638 self.raise_error("Expected DELETE or UPDATE") 3639 3640 kind = self._prev.text.lower() 3641 3642 if self._match_text_seq("NO", "ACTION"): 3643 action = "NO ACTION" 3644 elif self._match(TokenType.SET): 3645 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3646 action = "SET " + self._prev.text.upper() 3647 else: 3648 self._advance() 3649 action = self._prev.text.upper() 3650 3651 options[kind] = action 3652 3653 return self.expression( 3654 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3655 ) 3656 3657 def _parse_primary_key( 3658 self, wrapped_optional: bool = False, in_props: bool = False 3659 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3660 desc = ( 3661 self._match_set((TokenType.ASC, TokenType.DESC)) 3662 and self._prev.token_type == TokenType.DESC 3663 ) 3664 3665 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3666 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3667 3668 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3669 options = self._parse_key_constraint_options() 3670 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3671 3672 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3673 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3674 return this 3675 3676 bracket_kind = self._prev.token_type 3677 3678 if self._match(TokenType.COLON): 3679 expressions: t.List[t.Optional[exp.Expression]] = [ 3680 self.expression(exp.Slice, expression=self._parse_conjunction()) 3681 ] 3682 else: 3683 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3684 3685 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3686 if bracket_kind == TokenType.L_BRACE: 3687 this = self.expression(exp.Struct, expressions=expressions) 3688 elif not this or this.name.upper() == "ARRAY": 3689 this = self.expression(exp.Array, expressions=expressions) 3690 else: 3691 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3692 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3693 3694 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3695 self.raise_error("Expected ]") 3696 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3697 self.raise_error("Expected }") 3698 3699 self._add_comments(this) 3700 return self._parse_bracket(this) 3701 3702 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3703 if self._match(TokenType.COLON): 3704 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3705 return this 3706 3707 def _parse_case(self) -> t.Optional[exp.Expression]: 3708 ifs = [] 3709 default = None 3710 3711 expression = self._parse_conjunction() 3712 3713 while self._match(TokenType.WHEN): 3714 this = self._parse_conjunction() 3715 self._match(TokenType.THEN) 3716 then = self._parse_conjunction() 3717 ifs.append(self.expression(exp.If, this=this, true=then)) 3718 3719 if self._match(TokenType.ELSE): 3720 default = self._parse_conjunction() 3721 3722 if not self._match(TokenType.END): 3723 self.raise_error("Expected END after CASE", self._prev) 3724 3725 return self._parse_window( 3726 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3727 ) 3728 3729 def _parse_if(self) -> t.Optional[exp.Expression]: 3730 if self._match(TokenType.L_PAREN): 3731 args = self._parse_csv(self._parse_conjunction) 3732 this = self.validate_expression(exp.If.from_arg_list(args), args) 3733 self._match_r_paren() 3734 else: 3735 index = self._index - 1 3736 condition = self._parse_conjunction() 3737 3738 if not condition: 3739 self._retreat(index) 3740 return None 3741 3742 self._match(TokenType.THEN) 3743 true = self._parse_conjunction() 3744 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3745 self._match(TokenType.END) 3746 this = self.expression(exp.If, this=condition, true=true, false=false) 3747 3748 return self._parse_window(this) 3749 3750 def _parse_extract(self) -> exp.Extract: 3751 this = self._parse_function() or self._parse_var() or self._parse_type() 3752 3753 if self._match(TokenType.FROM): 3754 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3755 3756 if not self._match(TokenType.COMMA): 3757 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3758 3759 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3760 3761 def _parse_any_value(self) -> exp.AnyValue: 3762 this = self._parse_lambda() 3763 is_max = None 3764 having = None 3765 3766 if self._match(TokenType.HAVING): 3767 self._match_texts(("MAX", "MIN")) 3768 is_max = self._prev.text == "MAX" 3769 having = self._parse_column() 3770 3771 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3772 3773 def _parse_cast(self, strict: bool) -> exp.Expression: 3774 this = self._parse_conjunction() 3775 3776 if not self._match(TokenType.ALIAS): 3777 if self._match(TokenType.COMMA): 3778 return self.expression( 3779 exp.CastToStrType, this=this, expression=self._parse_string() 3780 ) 3781 else: 3782 self.raise_error("Expected AS after CAST") 3783 3784 fmt = None 3785 to = self._parse_types() 3786 3787 if not to: 3788 self.raise_error("Expected TYPE after CAST") 3789 elif to.this == exp.DataType.Type.CHAR: 3790 if self._match(TokenType.CHARACTER_SET): 3791 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3792 elif self._match(TokenType.FORMAT): 3793 fmt_string = self._parse_string() 3794 fmt = self._parse_at_time_zone(fmt_string) 3795 3796 if to.this in exp.DataType.TEMPORAL_TYPES: 3797 this = self.expression( 3798 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3799 this=this, 3800 format=exp.Literal.string( 3801 format_time( 3802 fmt_string.this if fmt_string else "", 3803 self.FORMAT_MAPPING or self.TIME_MAPPING, 3804 self.FORMAT_TRIE or self.TIME_TRIE, 3805 ) 3806 ), 3807 ) 3808 3809 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3810 this.set("zone", fmt.args["zone"]) 3811 3812 return this 3813 3814 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3815 3816 def _parse_concat(self) -> t.Optional[exp.Expression]: 3817 args = self._parse_csv(self._parse_conjunction) 3818 if self.CONCAT_NULL_OUTPUTS_STRING: 3819 args = [ 3820 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3821 for arg in args 3822 if arg 3823 ] 3824 3825 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3826 # we find such a call we replace it with its argument. 3827 if len(args) == 1: 3828 return args[0] 3829 3830 return self.expression( 3831 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3832 ) 3833 3834 def _parse_string_agg(self) -> exp.Expression: 3835 if self._match(TokenType.DISTINCT): 3836 args: t.List[t.Optional[exp.Expression]] = [ 3837 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3838 ] 3839 if self._match(TokenType.COMMA): 3840 args.extend(self._parse_csv(self._parse_conjunction)) 3841 else: 3842 args = self._parse_csv(self._parse_conjunction) 3843 3844 index = self._index 3845 if not self._match(TokenType.R_PAREN): 3846 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3847 return self.expression( 3848 exp.GroupConcat, 3849 this=seq_get(args, 0), 3850 separator=self._parse_order(this=seq_get(args, 1)), 3851 ) 3852 3853 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3854 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3855 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3856 if not self._match_text_seq("WITHIN", "GROUP"): 3857 self._retreat(index) 3858 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3859 3860 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3861 order = self._parse_order(this=seq_get(args, 0)) 3862 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3863 3864 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3865 this = self._parse_bitwise() 3866 3867 if self._match(TokenType.USING): 3868 to: t.Optional[exp.Expression] = self.expression( 3869 exp.CharacterSet, this=self._parse_var() 3870 ) 3871 elif self._match(TokenType.COMMA): 3872 to = self._parse_types() 3873 else: 3874 to = None 3875 3876 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3877 3878 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3879 """ 3880 There are generally two variants of the DECODE function: 3881 3882 - DECODE(bin, charset) 3883 - DECODE(expression, search, result [, search, result] ... [, default]) 3884 3885 The second variant will always be parsed into a CASE expression. Note that NULL 3886 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3887 instead of relying on pattern matching. 3888 """ 3889 args = self._parse_csv(self._parse_conjunction) 3890 3891 if len(args) < 3: 3892 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3893 3894 expression, *expressions = args 3895 if not expression: 3896 return None 3897 3898 ifs = [] 3899 for search, result in zip(expressions[::2], expressions[1::2]): 3900 if not search or not result: 3901 return None 3902 3903 if isinstance(search, exp.Literal): 3904 ifs.append( 3905 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3906 ) 3907 elif isinstance(search, exp.Null): 3908 ifs.append( 3909 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3910 ) 3911 else: 3912 cond = exp.or_( 3913 exp.EQ(this=expression.copy(), expression=search), 3914 exp.and_( 3915 exp.Is(this=expression.copy(), expression=exp.Null()), 3916 exp.Is(this=search.copy(), expression=exp.Null()), 3917 copy=False, 3918 ), 3919 copy=False, 3920 ) 3921 ifs.append(exp.If(this=cond, true=result)) 3922 3923 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3924 3925 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3926 self._match_text_seq("KEY") 3927 key = self._parse_field() 3928 self._match(TokenType.COLON) 3929 self._match_text_seq("VALUE") 3930 value = self._parse_field() 3931 3932 if not key and not value: 3933 return None 3934 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3935 3936 def _parse_json_object(self) -> exp.JSONObject: 3937 star = self._parse_star() 3938 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3939 3940 null_handling = None 3941 if self._match_text_seq("NULL", "ON", "NULL"): 3942 null_handling = "NULL ON NULL" 3943 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3944 null_handling = "ABSENT ON NULL" 3945 3946 unique_keys = None 3947 if self._match_text_seq("WITH", "UNIQUE"): 3948 unique_keys = True 3949 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3950 unique_keys = False 3951 3952 self._match_text_seq("KEYS") 3953 3954 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3955 format_json = self._match_text_seq("FORMAT", "JSON") 3956 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3957 3958 return self.expression( 3959 exp.JSONObject, 3960 expressions=expressions, 3961 null_handling=null_handling, 3962 unique_keys=unique_keys, 3963 return_type=return_type, 3964 format_json=format_json, 3965 encoding=encoding, 3966 ) 3967 3968 def _parse_logarithm(self) -> exp.Func: 3969 # Default argument order is base, expression 3970 args = self._parse_csv(self._parse_range) 3971 3972 if len(args) > 1: 3973 if not self.LOG_BASE_FIRST: 3974 args.reverse() 3975 return exp.Log.from_arg_list(args) 3976 3977 return self.expression( 3978 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3979 ) 3980 3981 def _parse_match_against(self) -> exp.MatchAgainst: 3982 expressions = self._parse_csv(self._parse_column) 3983 3984 self._match_text_seq(")", "AGAINST", "(") 3985 3986 this = self._parse_string() 3987 3988 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3989 modifier = "IN NATURAL LANGUAGE MODE" 3990 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3991 modifier = f"{modifier} WITH QUERY EXPANSION" 3992 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3993 modifier = "IN BOOLEAN MODE" 3994 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3995 modifier = "WITH QUERY EXPANSION" 3996 else: 3997 modifier = None 3998 3999 return self.expression( 4000 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4001 ) 4002 4003 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4004 def _parse_open_json(self) -> exp.OpenJSON: 4005 this = self._parse_bitwise() 4006 path = self._match(TokenType.COMMA) and self._parse_string() 4007 4008 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4009 this = self._parse_field(any_token=True) 4010 kind = self._parse_types() 4011 path = self._parse_string() 4012 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4013 4014 return self.expression( 4015 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4016 ) 4017 4018 expressions = None 4019 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4020 self._match_l_paren() 4021 expressions = self._parse_csv(_parse_open_json_column_def) 4022 4023 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4024 4025 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4026 args = self._parse_csv(self._parse_bitwise) 4027 4028 if self._match(TokenType.IN): 4029 return self.expression( 4030 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4031 ) 4032 4033 if haystack_first: 4034 haystack = seq_get(args, 0) 4035 needle = seq_get(args, 1) 4036 else: 4037 needle = seq_get(args, 0) 4038 haystack = seq_get(args, 1) 4039 4040 return self.expression( 4041 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4042 ) 4043 4044 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4045 args = self._parse_csv(self._parse_table) 4046 return exp.JoinHint(this=func_name.upper(), expressions=args) 4047 4048 def _parse_substring(self) -> exp.Substring: 4049 # Postgres supports the form: substring(string [from int] [for int]) 4050 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4051 4052 args = self._parse_csv(self._parse_bitwise) 4053 4054 if self._match(TokenType.FROM): 4055 args.append(self._parse_bitwise()) 4056 if self._match(TokenType.FOR): 4057 args.append(self._parse_bitwise()) 4058 4059 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4060 4061 def _parse_trim(self) -> exp.Trim: 4062 # https://www.w3resource.com/sql/character-functions/trim.php 4063 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4064 4065 position = None 4066 collation = None 4067 4068 if self._match_texts(self.TRIM_TYPES): 4069 position = self._prev.text.upper() 4070 4071 expression = self._parse_bitwise() 4072 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4073 this = self._parse_bitwise() 4074 else: 4075 this = expression 4076 expression = None 4077 4078 if self._match(TokenType.COLLATE): 4079 collation = self._parse_bitwise() 4080 4081 return self.expression( 4082 exp.Trim, this=this, position=position, expression=expression, collation=collation 4083 ) 4084 4085 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4086 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4087 4088 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4089 return self._parse_window(self._parse_id_var(), alias=True) 4090 4091 def _parse_respect_or_ignore_nulls( 4092 self, this: t.Optional[exp.Expression] 4093 ) -> t.Optional[exp.Expression]: 4094 if self._match_text_seq("IGNORE", "NULLS"): 4095 return self.expression(exp.IgnoreNulls, this=this) 4096 if self._match_text_seq("RESPECT", "NULLS"): 4097 return self.expression(exp.RespectNulls, this=this) 4098 return this 4099 4100 def _parse_window( 4101 self, this: t.Optional[exp.Expression], alias: bool = False 4102 ) -> t.Optional[exp.Expression]: 4103 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4104 self._match(TokenType.WHERE) 4105 this = self.expression( 4106 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4107 ) 4108 self._match_r_paren() 4109 4110 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4111 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4112 if self._match_text_seq("WITHIN", "GROUP"): 4113 order = self._parse_wrapped(self._parse_order) 4114 this = self.expression(exp.WithinGroup, this=this, expression=order) 4115 4116 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4117 # Some dialects choose to implement and some do not. 4118 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4119 4120 # There is some code above in _parse_lambda that handles 4121 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4122 4123 # The below changes handle 4124 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4125 4126 # Oracle allows both formats 4127 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4128 # and Snowflake chose to do the same for familiarity 4129 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4130 this = self._parse_respect_or_ignore_nulls(this) 4131 4132 # bigquery select from window x AS (partition by ...) 4133 if alias: 4134 over = None 4135 self._match(TokenType.ALIAS) 4136 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4137 return this 4138 else: 4139 over = self._prev.text.upper() 4140 4141 if not self._match(TokenType.L_PAREN): 4142 return self.expression( 4143 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4144 ) 4145 4146 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4147 4148 first = self._match(TokenType.FIRST) 4149 if self._match_text_seq("LAST"): 4150 first = False 4151 4152 partition = self._parse_partition_by() 4153 order = self._parse_order() 4154 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4155 4156 if kind: 4157 self._match(TokenType.BETWEEN) 4158 start = self._parse_window_spec() 4159 self._match(TokenType.AND) 4160 end = self._parse_window_spec() 4161 4162 spec = self.expression( 4163 exp.WindowSpec, 4164 kind=kind, 4165 start=start["value"], 4166 start_side=start["side"], 4167 end=end["value"], 4168 end_side=end["side"], 4169 ) 4170 else: 4171 spec = None 4172 4173 self._match_r_paren() 4174 4175 return self.expression( 4176 exp.Window, 4177 this=this, 4178 partition_by=partition, 4179 order=order, 4180 spec=spec, 4181 alias=window_alias, 4182 over=over, 4183 first=first, 4184 ) 4185 4186 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4187 self._match(TokenType.BETWEEN) 4188 4189 return { 4190 "value": ( 4191 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4192 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4193 or self._parse_bitwise() 4194 ), 4195 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4196 } 4197 4198 def _parse_alias( 4199 self, this: t.Optional[exp.Expression], explicit: bool = False 4200 ) -> t.Optional[exp.Expression]: 4201 any_token = self._match(TokenType.ALIAS) 4202 4203 if explicit and not any_token: 4204 return this 4205 4206 if self._match(TokenType.L_PAREN): 4207 aliases = self.expression( 4208 exp.Aliases, 4209 this=this, 4210 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4211 ) 4212 self._match_r_paren(aliases) 4213 return aliases 4214 4215 alias = self._parse_id_var(any_token) 4216 4217 if alias: 4218 return self.expression(exp.Alias, this=this, alias=alias) 4219 4220 return this 4221 4222 def _parse_id_var( 4223 self, 4224 any_token: bool = True, 4225 tokens: t.Optional[t.Collection[TokenType]] = None, 4226 ) -> t.Optional[exp.Expression]: 4227 identifier = self._parse_identifier() 4228 4229 if identifier: 4230 return identifier 4231 4232 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4233 quoted = self._prev.token_type == TokenType.STRING 4234 return exp.Identifier(this=self._prev.text, quoted=quoted) 4235 4236 return None 4237 4238 def _parse_string(self) -> t.Optional[exp.Expression]: 4239 if self._match(TokenType.STRING): 4240 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4241 return self._parse_placeholder() 4242 4243 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4244 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4245 4246 def _parse_number(self) -> t.Optional[exp.Expression]: 4247 if self._match(TokenType.NUMBER): 4248 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4249 return self._parse_placeholder() 4250 4251 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4252 if self._match(TokenType.IDENTIFIER): 4253 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4254 return self._parse_placeholder() 4255 4256 def _parse_var( 4257 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4258 ) -> t.Optional[exp.Expression]: 4259 if ( 4260 (any_token and self._advance_any()) 4261 or self._match(TokenType.VAR) 4262 or (self._match_set(tokens) if tokens else False) 4263 ): 4264 return self.expression(exp.Var, this=self._prev.text) 4265 return self._parse_placeholder() 4266 4267 def _advance_any(self) -> t.Optional[Token]: 4268 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4269 self._advance() 4270 return self._prev 4271 return None 4272 4273 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4274 return self._parse_var() or self._parse_string() 4275 4276 def _parse_null(self) -> t.Optional[exp.Expression]: 4277 if self._match(TokenType.NULL): 4278 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4279 return None 4280 4281 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4282 if self._match(TokenType.TRUE): 4283 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4284 if self._match(TokenType.FALSE): 4285 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4286 return None 4287 4288 def _parse_star(self) -> t.Optional[exp.Expression]: 4289 if self._match(TokenType.STAR): 4290 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4291 return None 4292 4293 def _parse_parameter(self) -> exp.Parameter: 4294 wrapped = self._match(TokenType.L_BRACE) 4295 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4296 self._match(TokenType.R_BRACE) 4297 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4298 4299 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4300 if self._match_set(self.PLACEHOLDER_PARSERS): 4301 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4302 if placeholder: 4303 return placeholder 4304 self._advance(-1) 4305 return None 4306 4307 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4308 if not self._match(TokenType.EXCEPT): 4309 return None 4310 if self._match(TokenType.L_PAREN, advance=False): 4311 return self._parse_wrapped_csv(self._parse_column) 4312 return self._parse_csv(self._parse_column) 4313 4314 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4315 if not self._match(TokenType.REPLACE): 4316 return None 4317 if self._match(TokenType.L_PAREN, advance=False): 4318 return self._parse_wrapped_csv(self._parse_expression) 4319 return self._parse_expressions() 4320 4321 def _parse_csv( 4322 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4323 ) -> t.List[t.Optional[exp.Expression]]: 4324 parse_result = parse_method() 4325 items = [parse_result] if parse_result is not None else [] 4326 4327 while self._match(sep): 4328 self._add_comments(parse_result) 4329 parse_result = parse_method() 4330 if parse_result is not None: 4331 items.append(parse_result) 4332 4333 return items 4334 4335 def _parse_tokens( 4336 self, parse_method: t.Callable, expressions: t.Dict 4337 ) -> t.Optional[exp.Expression]: 4338 this = parse_method() 4339 4340 while self._match_set(expressions): 4341 this = self.expression( 4342 expressions[self._prev.token_type], 4343 this=this, 4344 comments=self._prev_comments, 4345 expression=parse_method(), 4346 ) 4347 4348 return this 4349 4350 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4351 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4352 4353 def _parse_wrapped_csv( 4354 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4355 ) -> t.List[t.Optional[exp.Expression]]: 4356 return self._parse_wrapped( 4357 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4358 ) 4359 4360 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4361 wrapped = self._match(TokenType.L_PAREN) 4362 if not wrapped and not optional: 4363 self.raise_error("Expecting (") 4364 parse_result = parse_method() 4365 if wrapped: 4366 self._match_r_paren() 4367 return parse_result 4368 4369 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4370 return self._parse_csv(self._parse_expression) 4371 4372 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4373 return self._parse_select() or self._parse_set_operations( 4374 self._parse_expression() if alias else self._parse_conjunction() 4375 ) 4376 4377 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4378 return self._parse_query_modifiers( 4379 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4380 ) 4381 4382 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4383 this = None 4384 if self._match_texts(self.TRANSACTION_KIND): 4385 this = self._prev.text 4386 4387 self._match_texts({"TRANSACTION", "WORK"}) 4388 4389 modes = [] 4390 while True: 4391 mode = [] 4392 while self._match(TokenType.VAR): 4393 mode.append(self._prev.text) 4394 4395 if mode: 4396 modes.append(" ".join(mode)) 4397 if not self._match(TokenType.COMMA): 4398 break 4399 4400 return self.expression(exp.Transaction, this=this, modes=modes) 4401 4402 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4403 chain = None 4404 savepoint = None 4405 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4406 4407 self._match_texts({"TRANSACTION", "WORK"}) 4408 4409 if self._match_text_seq("TO"): 4410 self._match_text_seq("SAVEPOINT") 4411 savepoint = self._parse_id_var() 4412 4413 if self._match(TokenType.AND): 4414 chain = not self._match_text_seq("NO") 4415 self._match_text_seq("CHAIN") 4416 4417 if is_rollback: 4418 return self.expression(exp.Rollback, savepoint=savepoint) 4419 4420 return self.expression(exp.Commit, chain=chain) 4421 4422 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4423 if not self._match_text_seq("ADD"): 4424 return None 4425 4426 self._match(TokenType.COLUMN) 4427 exists_column = self._parse_exists(not_=True) 4428 expression = self._parse_column_def(self._parse_field(any_token=True)) 4429 4430 if expression: 4431 expression.set("exists", exists_column) 4432 4433 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4434 if self._match_texts(("FIRST", "AFTER")): 4435 position = self._prev.text 4436 column_position = self.expression( 4437 exp.ColumnPosition, this=self._parse_column(), position=position 4438 ) 4439 expression.set("position", column_position) 4440 4441 return expression 4442 4443 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4444 drop = self._match(TokenType.DROP) and self._parse_drop() 4445 if drop and not isinstance(drop, exp.Command): 4446 drop.set("kind", drop.args.get("kind", "COLUMN")) 4447 return drop 4448 4449 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4450 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4451 return self.expression( 4452 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4453 ) 4454 4455 def _parse_add_constraint(self) -> exp.AddConstraint: 4456 this = None 4457 kind = self._prev.token_type 4458 4459 if kind == TokenType.CONSTRAINT: 4460 this = self._parse_id_var() 4461 4462 if self._match_text_seq("CHECK"): 4463 expression = self._parse_wrapped(self._parse_conjunction) 4464 enforced = self._match_text_seq("ENFORCED") 4465 4466 return self.expression( 4467 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4468 ) 4469 4470 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4471 expression = self._parse_foreign_key() 4472 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4473 expression = self._parse_primary_key() 4474 else: 4475 expression = None 4476 4477 return self.expression(exp.AddConstraint, this=this, expression=expression) 4478 4479 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4480 index = self._index - 1 4481 4482 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4483 return self._parse_csv(self._parse_add_constraint) 4484 4485 self._retreat(index) 4486 return self._parse_csv(self._parse_add_column) 4487 4488 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4489 self._match(TokenType.COLUMN) 4490 column = self._parse_field(any_token=True) 4491 4492 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4493 return self.expression(exp.AlterColumn, this=column, drop=True) 4494 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4495 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4496 4497 self._match_text_seq("SET", "DATA") 4498 return self.expression( 4499 exp.AlterColumn, 4500 this=column, 4501 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4502 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4503 using=self._match(TokenType.USING) and self._parse_conjunction(), 4504 ) 4505 4506 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4507 index = self._index - 1 4508 4509 partition_exists = self._parse_exists() 4510 if self._match(TokenType.PARTITION, advance=False): 4511 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4512 4513 self._retreat(index) 4514 return self._parse_csv(self._parse_drop_column) 4515 4516 def _parse_alter_table_rename(self) -> exp.RenameTable: 4517 self._match_text_seq("TO") 4518 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4519 4520 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4521 start = self._prev 4522 4523 if not self._match(TokenType.TABLE): 4524 return self._parse_as_command(start) 4525 4526 exists = self._parse_exists() 4527 this = self._parse_table(schema=True) 4528 4529 if self._next: 4530 self._advance() 4531 4532 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4533 if parser: 4534 actions = ensure_list(parser(self)) 4535 4536 if not self._curr: 4537 return self.expression( 4538 exp.AlterTable, 4539 this=this, 4540 exists=exists, 4541 actions=actions, 4542 ) 4543 return self._parse_as_command(start) 4544 4545 def _parse_merge(self) -> exp.Merge: 4546 self._match(TokenType.INTO) 4547 target = self._parse_table() 4548 4549 self._match(TokenType.USING) 4550 using = self._parse_table() 4551 4552 self._match(TokenType.ON) 4553 on = self._parse_conjunction() 4554 4555 whens = [] 4556 while self._match(TokenType.WHEN): 4557 matched = not self._match(TokenType.NOT) 4558 self._match_text_seq("MATCHED") 4559 source = ( 4560 False 4561 if self._match_text_seq("BY", "TARGET") 4562 else self._match_text_seq("BY", "SOURCE") 4563 ) 4564 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4565 4566 self._match(TokenType.THEN) 4567 4568 if self._match(TokenType.INSERT): 4569 _this = self._parse_star() 4570 if _this: 4571 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4572 else: 4573 then = self.expression( 4574 exp.Insert, 4575 this=self._parse_value(), 4576 expression=self._match(TokenType.VALUES) and self._parse_value(), 4577 ) 4578 elif self._match(TokenType.UPDATE): 4579 expressions = self._parse_star() 4580 if expressions: 4581 then = self.expression(exp.Update, expressions=expressions) 4582 else: 4583 then = self.expression( 4584 exp.Update, 4585 expressions=self._match(TokenType.SET) 4586 and self._parse_csv(self._parse_equality), 4587 ) 4588 elif self._match(TokenType.DELETE): 4589 then = self.expression(exp.Var, this=self._prev.text) 4590 else: 4591 then = None 4592 4593 whens.append( 4594 self.expression( 4595 exp.When, 4596 matched=matched, 4597 source=source, 4598 condition=condition, 4599 then=then, 4600 ) 4601 ) 4602 4603 return self.expression( 4604 exp.Merge, 4605 this=target, 4606 using=using, 4607 on=on, 4608 expressions=whens, 4609 ) 4610 4611 def _parse_show(self) -> t.Optional[exp.Expression]: 4612 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4613 if parser: 4614 return parser(self) 4615 self._advance() 4616 return self.expression(exp.Show, this=self._prev.text.upper()) 4617 4618 def _parse_set_item_assignment( 4619 self, kind: t.Optional[str] = None 4620 ) -> t.Optional[exp.Expression]: 4621 index = self._index 4622 4623 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4624 return self._parse_set_transaction(global_=kind == "GLOBAL") 4625 4626 left = self._parse_primary() or self._parse_id_var() 4627 4628 if not self._match_texts(("=", "TO")): 4629 self._retreat(index) 4630 return None 4631 4632 right = self._parse_statement() or self._parse_id_var() 4633 this = self.expression(exp.EQ, this=left, expression=right) 4634 4635 return self.expression(exp.SetItem, this=this, kind=kind) 4636 4637 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4638 self._match_text_seq("TRANSACTION") 4639 characteristics = self._parse_csv( 4640 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4641 ) 4642 return self.expression( 4643 exp.SetItem, 4644 expressions=characteristics, 4645 kind="TRANSACTION", 4646 **{"global": global_}, # type: ignore 4647 ) 4648 4649 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4650 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4651 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4652 4653 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4654 index = self._index 4655 set_ = self.expression( 4656 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4657 ) 4658 4659 if self._curr: 4660 self._retreat(index) 4661 return self._parse_as_command(self._prev) 4662 4663 return set_ 4664 4665 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4666 for option in options: 4667 if self._match_text_seq(*option.split(" ")): 4668 return exp.var(option) 4669 return None 4670 4671 def _parse_as_command(self, start: Token) -> exp.Command: 4672 while self._curr: 4673 self._advance() 4674 text = self._find_sql(start, self._prev) 4675 size = len(start.text) 4676 return exp.Command(this=text[:size], expression=text[size:]) 4677 4678 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4679 settings = [] 4680 4681 self._match_l_paren() 4682 kind = self._parse_id_var() 4683 4684 if self._match(TokenType.L_PAREN): 4685 while True: 4686 key = self._parse_id_var() 4687 value = self._parse_primary() 4688 4689 if not key and value is None: 4690 break 4691 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4692 self._match(TokenType.R_PAREN) 4693 4694 self._match_r_paren() 4695 4696 return self.expression( 4697 exp.DictProperty, 4698 this=this, 4699 kind=kind.this if kind else None, 4700 settings=settings, 4701 ) 4702 4703 def _parse_dict_range(self, this: str) -> exp.DictRange: 4704 self._match_l_paren() 4705 has_min = self._match_text_seq("MIN") 4706 if has_min: 4707 min = self._parse_var() or self._parse_primary() 4708 self._match_text_seq("MAX") 4709 max = self._parse_var() or self._parse_primary() 4710 else: 4711 max = self._parse_var() or self._parse_primary() 4712 min = exp.Literal.number(0) 4713 self._match_r_paren() 4714 return self.expression(exp.DictRange, this=this, min=min, max=max) 4715 4716 def _find_parser( 4717 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4718 ) -> t.Optional[t.Callable]: 4719 if not self._curr: 4720 return None 4721 4722 index = self._index 4723 this = [] 4724 while True: 4725 # The current token might be multiple words 4726 curr = self._curr.text.upper() 4727 key = curr.split(" ") 4728 this.append(curr) 4729 4730 self._advance() 4731 result, trie = in_trie(trie, key) 4732 if result == TrieResult.FAILED: 4733 break 4734 4735 if result == TrieResult.EXISTS: 4736 subparser = parsers[" ".join(this)] 4737 return subparser 4738 4739 self._retreat(index) 4740 return None 4741 4742 def _match(self, token_type, advance=True, expression=None): 4743 if not self._curr: 4744 return None 4745 4746 if self._curr.token_type == token_type: 4747 if advance: 4748 self._advance() 4749 self._add_comments(expression) 4750 return True 4751 4752 return None 4753 4754 def _match_set(self, types, advance=True): 4755 if not self._curr: 4756 return None 4757 4758 if self._curr.token_type in types: 4759 if advance: 4760 self._advance() 4761 return True 4762 4763 return None 4764 4765 def _match_pair(self, token_type_a, token_type_b, advance=True): 4766 if not self._curr or not self._next: 4767 return None 4768 4769 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4770 if advance: 4771 self._advance(2) 4772 return True 4773 4774 return None 4775 4776 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4777 if not self._match(TokenType.L_PAREN, expression=expression): 4778 self.raise_error("Expecting (") 4779 4780 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4781 if not self._match(TokenType.R_PAREN, expression=expression): 4782 self.raise_error("Expecting )") 4783 4784 def _match_texts(self, texts, advance=True): 4785 if self._curr and self._curr.text.upper() in texts: 4786 if advance: 4787 self._advance() 4788 return True 4789 return False 4790 4791 def _match_text_seq(self, *texts, advance=True): 4792 index = self._index 4793 for text in texts: 4794 if self._curr and self._curr.text.upper() == text: 4795 self._advance() 4796 else: 4797 self._retreat(index) 4798 return False 4799 4800 if not advance: 4801 self._retreat(index) 4802 4803 return True 4804 4805 @t.overload 4806 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4807 ... 4808 4809 @t.overload 4810 def _replace_columns_with_dots( 4811 self, this: t.Optional[exp.Expression] 4812 ) -> t.Optional[exp.Expression]: 4813 ... 4814 4815 def _replace_columns_with_dots(self, this): 4816 if isinstance(this, exp.Dot): 4817 exp.replace_children(this, self._replace_columns_with_dots) 4818 elif isinstance(this, exp.Column): 4819 exp.replace_children(this, self._replace_columns_with_dots) 4820 table = this.args.get("table") 4821 this = ( 4822 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4823 ) 4824 4825 return this 4826 4827 def _replace_lambda( 4828 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4829 ) -> t.Optional[exp.Expression]: 4830 if not node: 4831 return node 4832 4833 for column in node.find_all(exp.Column): 4834 if column.parts[0].name in lambda_variables: 4835 dot_or_id = column.to_dot() if column.table else column.this 4836 parent = column.parent 4837 4838 while isinstance(parent, exp.Dot): 4839 if not isinstance(parent.parent, exp.Dot): 4840 parent.replace(dot_or_id) 4841 break 4842 parent = parent.parent 4843 else: 4844 if column is node: 4845 node = dot_or_id 4846 else: 4847 column.replace(dot_or_id) 4848 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.RLIKE, 332 TokenType.ROW, 333 TokenType.UNNEST, 334 TokenType.VAR, 335 TokenType.LEFT, 336 TokenType.RIGHT, 337 TokenType.DATE, 338 TokenType.DATETIME, 339 TokenType.TABLE, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMPTZ, 342 TokenType.WINDOW, 343 TokenType.XOR, 344 *TYPE_TOKENS, 345 *SUBQUERY_PREDICATES, 346 } 347 348 CONJUNCTION = { 349 TokenType.AND: exp.And, 350 TokenType.OR: exp.Or, 351 } 352 353 EQUALITY = { 354 TokenType.EQ: exp.EQ, 355 TokenType.NEQ: exp.NEQ, 356 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 357 } 358 359 COMPARISON = { 360 TokenType.GT: exp.GT, 361 TokenType.GTE: exp.GTE, 362 TokenType.LT: exp.LT, 363 TokenType.LTE: exp.LTE, 364 } 365 366 BITWISE = { 367 TokenType.AMP: exp.BitwiseAnd, 368 TokenType.CARET: exp.BitwiseXor, 369 TokenType.PIPE: exp.BitwiseOr, 370 TokenType.DPIPE: exp.DPipe, 371 } 372 373 TERM = { 374 TokenType.DASH: exp.Sub, 375 TokenType.PLUS: exp.Add, 376 TokenType.MOD: exp.Mod, 377 TokenType.COLLATE: exp.Collate, 378 } 379 380 FACTOR = { 381 TokenType.DIV: exp.IntDiv, 382 TokenType.LR_ARROW: exp.Distance, 383 TokenType.SLASH: exp.Div, 384 TokenType.STAR: exp.Mul, 385 } 386 387 TIMESTAMPS = { 388 TokenType.TIME, 389 TokenType.TIMESTAMP, 390 TokenType.TIMESTAMPTZ, 391 TokenType.TIMESTAMPLTZ, 392 } 393 394 SET_OPERATIONS = { 395 TokenType.UNION, 396 TokenType.INTERSECT, 397 TokenType.EXCEPT, 398 } 399 400 JOIN_METHODS = { 401 TokenType.NATURAL, 402 TokenType.ASOF, 403 } 404 405 JOIN_SIDES = { 406 TokenType.LEFT, 407 TokenType.RIGHT, 408 TokenType.FULL, 409 } 410 411 JOIN_KINDS = { 412 TokenType.INNER, 413 TokenType.OUTER, 414 TokenType.CROSS, 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 JOIN_HINTS: t.Set[str] = set() 420 421 LAMBDAS = { 422 TokenType.ARROW: lambda self, expressions: self.expression( 423 exp.Lambda, 424 this=self._replace_lambda( 425 self._parse_conjunction(), 426 {node.name for node in expressions}, 427 ), 428 expressions=expressions, 429 ), 430 TokenType.FARROW: lambda self, expressions: self.expression( 431 exp.Kwarg, 432 this=exp.var(expressions[0].name), 433 expression=self._parse_conjunction(), 434 ), 435 } 436 437 COLUMN_OPERATORS = { 438 TokenType.DOT: None, 439 TokenType.DCOLON: lambda self, this, to: self.expression( 440 exp.Cast if self.STRICT_CAST else exp.TryCast, 441 this=this, 442 to=to, 443 ), 444 TokenType.ARROW: lambda self, this, path: self.expression( 445 exp.JSONExtract, 446 this=this, 447 expression=path, 448 ), 449 TokenType.DARROW: lambda self, this, path: self.expression( 450 exp.JSONExtractScalar, 451 this=this, 452 expression=path, 453 ), 454 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 455 exp.JSONBExtract, 456 this=this, 457 expression=path, 458 ), 459 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 460 exp.JSONBExtractScalar, 461 this=this, 462 expression=path, 463 ), 464 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 465 exp.JSONBContains, 466 this=this, 467 expression=key, 468 ), 469 } 470 471 EXPRESSION_PARSERS = { 472 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 473 exp.Column: lambda self: self._parse_column(), 474 exp.Condition: lambda self: self._parse_conjunction(), 475 exp.DataType: lambda self: self._parse_types(), 476 exp.Expression: lambda self: self._parse_statement(), 477 exp.From: lambda self: self._parse_from(), 478 exp.Group: lambda self: self._parse_group(), 479 exp.Having: lambda self: self._parse_having(), 480 exp.Identifier: lambda self: self._parse_id_var(), 481 exp.Join: lambda self: self._parse_join(), 482 exp.Lambda: lambda self: self._parse_lambda(), 483 exp.Lateral: lambda self: self._parse_lateral(), 484 exp.Limit: lambda self: self._parse_limit(), 485 exp.Offset: lambda self: self._parse_offset(), 486 exp.Order: lambda self: self._parse_order(), 487 exp.Ordered: lambda self: self._parse_ordered(), 488 exp.Properties: lambda self: self._parse_properties(), 489 exp.Qualify: lambda self: self._parse_qualify(), 490 exp.Returning: lambda self: self._parse_returning(), 491 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 492 exp.Table: lambda self: self._parse_table_parts(), 493 exp.TableAlias: lambda self: self._parse_table_alias(), 494 exp.Where: lambda self: self._parse_where(), 495 exp.Window: lambda self: self._parse_named_window(), 496 exp.With: lambda self: self._parse_with(), 497 "JOIN_TYPE": lambda self: self._parse_join_parts(), 498 } 499 500 STATEMENT_PARSERS = { 501 TokenType.ALTER: lambda self: self._parse_alter(), 502 TokenType.BEGIN: lambda self: self._parse_transaction(), 503 TokenType.CACHE: lambda self: self._parse_cache(), 504 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 505 TokenType.COMMENT: lambda self: self._parse_comment(), 506 TokenType.CREATE: lambda self: self._parse_create(), 507 TokenType.DELETE: lambda self: self._parse_delete(), 508 TokenType.DESC: lambda self: self._parse_describe(), 509 TokenType.DESCRIBE: lambda self: self._parse_describe(), 510 TokenType.DROP: lambda self: self._parse_drop(), 511 TokenType.FROM: lambda self: exp.select("*").from_( 512 t.cast(exp.From, self._parse_from(skip_from_token=True)) 513 ), 514 TokenType.INSERT: lambda self: self._parse_insert(), 515 TokenType.LOAD: lambda self: self._parse_load(), 516 TokenType.MERGE: lambda self: self._parse_merge(), 517 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 518 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 519 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 520 TokenType.SET: lambda self: self._parse_set(), 521 TokenType.UNCACHE: lambda self: self._parse_uncache(), 522 TokenType.UPDATE: lambda self: self._parse_update(), 523 TokenType.USE: lambda self: self.expression( 524 exp.Use, 525 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 526 and exp.var(self._prev.text), 527 this=self._parse_table(schema=False), 528 ), 529 } 530 531 UNARY_PARSERS = { 532 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 533 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 534 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 535 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 536 } 537 538 PRIMARY_PARSERS = { 539 TokenType.STRING: lambda self, token: self.expression( 540 exp.Literal, this=token.text, is_string=True 541 ), 542 TokenType.NUMBER: lambda self, token: self.expression( 543 exp.Literal, this=token.text, is_string=False 544 ), 545 TokenType.STAR: lambda self, _: self.expression( 546 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 547 ), 548 TokenType.NULL: lambda self, _: self.expression(exp.Null), 549 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 550 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 551 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 552 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 553 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 554 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 555 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 556 exp.National, this=token.text 557 ), 558 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 559 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 560 } 561 562 PLACEHOLDER_PARSERS = { 563 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 564 TokenType.PARAMETER: lambda self: self._parse_parameter(), 565 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 566 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 567 else None, 568 } 569 570 RANGE_PARSERS = { 571 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 572 TokenType.GLOB: binary_range_parser(exp.Glob), 573 TokenType.ILIKE: binary_range_parser(exp.ILike), 574 TokenType.IN: lambda self, this: self._parse_in(this), 575 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 576 TokenType.IS: lambda self, this: self._parse_is(this), 577 TokenType.LIKE: binary_range_parser(exp.Like), 578 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 579 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 580 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 581 } 582 583 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 584 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 585 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 586 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 587 "CHARACTER SET": lambda self: self._parse_character_set(), 588 "CHECKSUM": lambda self: self._parse_checksum(), 589 "CLUSTER BY": lambda self: self._parse_cluster(), 590 "CLUSTERED": lambda self: self._parse_clustered_by(), 591 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 592 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 593 "COPY": lambda self: self._parse_copy_property(), 594 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 595 "DEFINER": lambda self: self._parse_definer(), 596 "DETERMINISTIC": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "DISTKEY": lambda self: self._parse_distkey(), 600 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 601 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 602 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 603 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 604 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 605 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 606 "FREESPACE": lambda self: self._parse_freespace(), 607 "IMMUTABLE": lambda self: self.expression( 608 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 609 ), 610 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 611 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 612 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 613 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 614 "LIKE": lambda self: self._parse_create_like(), 615 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 616 "LOCK": lambda self: self._parse_locking(), 617 "LOCKING": lambda self: self._parse_locking(), 618 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 619 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 620 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 621 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 622 "NO": lambda self: self._parse_no_property(), 623 "ON": lambda self: self._parse_on_property(), 624 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 625 "PARTITION BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 627 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 628 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 629 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 630 "RETURNS": lambda self: self._parse_returns(), 631 "ROW": lambda self: self._parse_row(), 632 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 633 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 634 "SETTINGS": lambda self: self.expression( 635 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 636 ), 637 "SORTKEY": lambda self: self._parse_sortkey(), 638 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 639 "STABLE": lambda self: self.expression( 640 exp.StabilityProperty, this=exp.Literal.string("STABLE") 641 ), 642 "STORED": lambda self: self._parse_stored(), 643 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 644 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 645 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 646 "TO": lambda self: self._parse_to_table(), 647 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 648 "TTL": lambda self: self._parse_ttl(), 649 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 650 "VOLATILE": lambda self: self._parse_volatile_property(), 651 "WITH": lambda self: self._parse_with_property(), 652 } 653 654 CONSTRAINT_PARSERS = { 655 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 656 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 657 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 658 "CHARACTER SET": lambda self: self.expression( 659 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 660 ), 661 "CHECK": lambda self: self.expression( 662 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 663 ), 664 "COLLATE": lambda self: self.expression( 665 exp.CollateColumnConstraint, this=self._parse_var() 666 ), 667 "COMMENT": lambda self: self.expression( 668 exp.CommentColumnConstraint, this=self._parse_string() 669 ), 670 "COMPRESS": lambda self: self._parse_compress(), 671 "DEFAULT": lambda self: self.expression( 672 exp.DefaultColumnConstraint, this=self._parse_bitwise() 673 ), 674 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 675 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 676 "FORMAT": lambda self: self.expression( 677 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 678 ), 679 "GENERATED": lambda self: self._parse_generated_as_identity(), 680 "IDENTITY": lambda self: self._parse_auto_increment(), 681 "INLINE": lambda self: self._parse_inline(), 682 "LIKE": lambda self: self._parse_create_like(), 683 "NOT": lambda self: self._parse_not_constraint(), 684 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 685 "ON": lambda self: self._match(TokenType.UPDATE) 686 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 687 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 688 "PRIMARY KEY": lambda self: self._parse_primary_key(), 689 "REFERENCES": lambda self: self._parse_references(match=False), 690 "TITLE": lambda self: self.expression( 691 exp.TitleColumnConstraint, this=self._parse_var_or_string() 692 ), 693 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 694 "UNIQUE": lambda self: self._parse_unique(), 695 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 696 } 697 698 ALTER_PARSERS = { 699 "ADD": lambda self: self._parse_alter_table_add(), 700 "ALTER": lambda self: self._parse_alter_table_alter(), 701 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 702 "DROP": lambda self: self._parse_alter_table_drop(), 703 "RENAME": lambda self: self._parse_alter_table_rename(), 704 } 705 706 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 707 708 NO_PAREN_FUNCTION_PARSERS = { 709 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 710 TokenType.CASE: lambda self: self._parse_case(), 711 TokenType.IF: lambda self: self._parse_if(), 712 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 713 exp.NextValueFor, 714 this=self._parse_column(), 715 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 716 ), 717 } 718 719 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 720 721 FUNCTION_PARSERS = { 722 "ANY_VALUE": lambda self: self._parse_any_value(), 723 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 724 "CONCAT": lambda self: self._parse_concat(), 725 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 726 "DECODE": lambda self: self._parse_decode(), 727 "EXTRACT": lambda self: self._parse_extract(), 728 "JSON_OBJECT": lambda self: self._parse_json_object(), 729 "LOG": lambda self: self._parse_logarithm(), 730 "MATCH": lambda self: self._parse_match_against(), 731 "OPENJSON": lambda self: self._parse_open_json(), 732 "POSITION": lambda self: self._parse_position(), 733 "SAFE_CAST": lambda self: self._parse_cast(False), 734 "STRING_AGG": lambda self: self._parse_string_agg(), 735 "SUBSTRING": lambda self: self._parse_substring(), 736 "TRIM": lambda self: self._parse_trim(), 737 "TRY_CAST": lambda self: self._parse_cast(False), 738 "TRY_CONVERT": lambda self: self._parse_convert(False), 739 } 740 741 QUERY_MODIFIER_PARSERS = { 742 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 743 TokenType.WHERE: lambda self: ("where", self._parse_where()), 744 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 745 TokenType.HAVING: lambda self: ("having", self._parse_having()), 746 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 747 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 748 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 749 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 750 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 751 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 752 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 753 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 754 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 756 TokenType.CLUSTER_BY: lambda self: ( 757 "cluster", 758 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 759 ), 760 TokenType.DISTRIBUTE_BY: lambda self: ( 761 "distribute", 762 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 763 ), 764 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 765 } 766 767 SET_PARSERS = { 768 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 769 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 770 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 771 "TRANSACTION": lambda self: self._parse_set_transaction(), 772 } 773 774 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 775 776 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 777 778 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 779 780 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 781 782 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 783 784 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 785 TRANSACTION_CHARACTERISTICS = { 786 "ISOLATION LEVEL REPEATABLE READ", 787 "ISOLATION LEVEL READ COMMITTED", 788 "ISOLATION LEVEL READ UNCOMMITTED", 789 "ISOLATION LEVEL SERIALIZABLE", 790 "READ WRITE", 791 "READ ONLY", 792 } 793 794 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 795 796 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 797 798 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 799 800 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 801 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 802 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 803 804 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 805 806 STRICT_CAST = True 807 808 # A NULL arg in CONCAT yields NULL by default 809 CONCAT_NULL_OUTPUTS_STRING = False 810 811 PREFIXED_PIVOT_COLUMNS = False 812 IDENTIFY_PIVOT_STRINGS = False 813 814 LOG_BASE_FIRST = True 815 LOG_DEFAULTS_TO_LN = False 816 817 __slots__ = ( 818 "error_level", 819 "error_message_context", 820 "max_errors", 821 "sql", 822 "errors", 823 "_tokens", 824 "_index", 825 "_curr", 826 "_next", 827 "_prev", 828 "_prev_comments", 829 ) 830 831 # Autofilled 832 INDEX_OFFSET: int = 0 833 UNNEST_COLUMN_ONLY: bool = False 834 ALIAS_POST_TABLESAMPLE: bool = False 835 STRICT_STRING_CONCAT = False 836 NULL_ORDERING: str = "nulls_are_small" 837 SHOW_TRIE: t.Dict = {} 838 SET_TRIE: t.Dict = {} 839 FORMAT_MAPPING: t.Dict[str, str] = {} 840 FORMAT_TRIE: t.Dict = {} 841 TIME_MAPPING: t.Dict[str, str] = {} 842 TIME_TRIE: t.Dict = {} 843 844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset() 854 855 def reset(self): 856 self.sql = "" 857 self.errors = [] 858 self._tokens = [] 859 self._index = 0 860 self._curr = None 861 self._next = None 862 self._prev = None 863 self._prev_comments = None 864 865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 ) 882 883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1] 918 919 def _parse( 920 self, 921 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 922 raw_tokens: t.List[Token], 923 sql: t.Optional[str] = None, 924 ) -> t.List[t.Optional[exp.Expression]]: 925 self.reset() 926 self.sql = sql or "" 927 928 total = len(raw_tokens) 929 chunks: t.List[t.List[Token]] = [[]] 930 931 for i, token in enumerate(raw_tokens): 932 if token.token_type == TokenType.SEMICOLON: 933 if i < total - 1: 934 chunks.append([]) 935 else: 936 chunks[-1].append(token) 937 938 expressions = [] 939 940 for tokens in chunks: 941 self._index = -1 942 self._tokens = tokens 943 self._advance() 944 945 expressions.append(parse_method(self)) 946 947 if self._index < len(self._tokens): 948 self.raise_error("Invalid expression / Unexpected token") 949 950 self.check_errors() 951 952 return expressions 953 954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 ) 964 965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error) 992 993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance) 1010 1011 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1012 if expression and self._prev_comments: 1013 expression.add_comments(self._prev_comments) 1014 self._prev_comments = None 1015 1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression 1032 1033 def _find_sql(self, start: Token, end: Token) -> str: 1034 return self.sql[start.start : end.end + 1] 1035 1036 def _advance(self, times: int = 1) -> None: 1037 self._index += times 1038 self._curr = seq_get(self._tokens, self._index) 1039 self._next = seq_get(self._tokens, self._index + 1) 1040 1041 if self._index > 0: 1042 self._prev = self._tokens[self._index - 1] 1043 self._prev_comments = self._prev.comments 1044 else: 1045 self._prev = None 1046 self._prev_comments = None 1047 1048 def _retreat(self, index: int) -> None: 1049 if index != self._index: 1050 self._advance(index - self._index) 1051 1052 def _parse_command(self) -> exp.Command: 1053 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1054 1055 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1056 start = self._prev 1057 exists = self._parse_exists() if allow_exists else None 1058 1059 self._match(TokenType.ON) 1060 1061 kind = self._match_set(self.CREATABLES) and self._prev 1062 if not kind: 1063 return self._parse_as_command(start) 1064 1065 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1066 this = self._parse_user_defined_function(kind=kind.token_type) 1067 elif kind.token_type == TokenType.TABLE: 1068 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1069 elif kind.token_type == TokenType.COLUMN: 1070 this = self._parse_column() 1071 else: 1072 this = self._parse_id_var() 1073 1074 self._match(TokenType.IS) 1075 1076 return self.expression( 1077 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1078 ) 1079 1080 def _parse_to_table( 1081 self, 1082 ) -> exp.ToTableProperty: 1083 table = self._parse_table_parts(schema=True) 1084 return self.expression(exp.ToTableProperty, this=table) 1085 1086 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1087 def _parse_ttl(self) -> exp.Expression: 1088 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1089 this = self._parse_bitwise() 1090 1091 if self._match_text_seq("DELETE"): 1092 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1093 if self._match_text_seq("RECOMPRESS"): 1094 return self.expression( 1095 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1096 ) 1097 if self._match_text_seq("TO", "DISK"): 1098 return self.expression( 1099 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1100 ) 1101 if self._match_text_seq("TO", "VOLUME"): 1102 return self.expression( 1103 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1104 ) 1105 1106 return this 1107 1108 expressions = self._parse_csv(_parse_ttl_action) 1109 where = self._parse_where() 1110 group = self._parse_group() 1111 1112 aggregates = None 1113 if group and self._match(TokenType.SET): 1114 aggregates = self._parse_csv(self._parse_set_item) 1115 1116 return self.expression( 1117 exp.MergeTreeTTL, 1118 expressions=expressions, 1119 where=where, 1120 group=group, 1121 aggregates=aggregates, 1122 ) 1123 1124 def _parse_statement(self) -> t.Optional[exp.Expression]: 1125 if self._curr is None: 1126 return None 1127 1128 if self._match_set(self.STATEMENT_PARSERS): 1129 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1130 1131 if self._match_set(Tokenizer.COMMANDS): 1132 return self._parse_command() 1133 1134 expression = self._parse_expression() 1135 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1136 return self._parse_query_modifiers(expression) 1137 1138 def _parse_drop(self) -> exp.Drop | exp.Command: 1139 start = self._prev 1140 temporary = self._match(TokenType.TEMPORARY) 1141 materialized = self._match_text_seq("MATERIALIZED") 1142 1143 kind = self._match_set(self.CREATABLES) and self._prev.text 1144 if not kind: 1145 return self._parse_as_command(start) 1146 1147 return self.expression( 1148 exp.Drop, 1149 comments=start.comments, 1150 exists=self._parse_exists(), 1151 this=self._parse_table(schema=True), 1152 kind=kind, 1153 temporary=temporary, 1154 materialized=materialized, 1155 cascade=self._match_text_seq("CASCADE"), 1156 constraints=self._match_text_seq("CONSTRAINTS"), 1157 purge=self._match_text_seq("PURGE"), 1158 ) 1159 1160 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1161 return ( 1162 self._match(TokenType.IF) 1163 and (not not_ or self._match(TokenType.NOT)) 1164 and self._match(TokenType.EXISTS) 1165 ) 1166 1167 def _parse_create(self) -> exp.Create | exp.Command: 1168 # Note: this can't be None because we've matched a statement parser 1169 start = self._prev 1170 replace = start.text.upper() == "REPLACE" or self._match_pair( 1171 TokenType.OR, TokenType.REPLACE 1172 ) 1173 unique = self._match(TokenType.UNIQUE) 1174 1175 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1176 self._advance() 1177 1178 properties = None 1179 create_token = self._match_set(self.CREATABLES) and self._prev 1180 1181 if not create_token: 1182 # exp.Properties.Location.POST_CREATE 1183 properties = self._parse_properties() 1184 create_token = self._match_set(self.CREATABLES) and self._prev 1185 1186 if not properties or not create_token: 1187 return self._parse_as_command(start) 1188 1189 exists = self._parse_exists(not_=True) 1190 this = None 1191 expression = None 1192 indexes = None 1193 no_schema_binding = None 1194 begin = None 1195 clone = None 1196 1197 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1198 nonlocal properties 1199 if properties and temp_props: 1200 properties.expressions.extend(temp_props.expressions) 1201 elif temp_props: 1202 properties = temp_props 1203 1204 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1205 this = self._parse_user_defined_function(kind=create_token.token_type) 1206 1207 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1208 extend_props(self._parse_properties()) 1209 1210 self._match(TokenType.ALIAS) 1211 begin = self._match(TokenType.BEGIN) 1212 return_ = self._match_text_seq("RETURN") 1213 expression = self._parse_statement() 1214 1215 if return_: 1216 expression = self.expression(exp.Return, this=expression) 1217 elif create_token.token_type == TokenType.INDEX: 1218 this = self._parse_index(index=self._parse_id_var()) 1219 elif create_token.token_type in self.DB_CREATABLES: 1220 table_parts = self._parse_table_parts(schema=True) 1221 1222 # exp.Properties.Location.POST_NAME 1223 self._match(TokenType.COMMA) 1224 extend_props(self._parse_properties(before=True)) 1225 1226 this = self._parse_schema(this=table_parts) 1227 1228 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1229 extend_props(self._parse_properties()) 1230 1231 self._match(TokenType.ALIAS) 1232 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1233 # exp.Properties.Location.POST_ALIAS 1234 extend_props(self._parse_properties()) 1235 1236 expression = self._parse_ddl_select() 1237 1238 if create_token.token_type == TokenType.TABLE: 1239 # exp.Properties.Location.POST_EXPRESSION 1240 extend_props(self._parse_properties()) 1241 1242 indexes = [] 1243 while True: 1244 index = self._parse_index() 1245 1246 # exp.Properties.Location.POST_INDEX 1247 extend_props(self._parse_properties()) 1248 1249 if not index: 1250 break 1251 else: 1252 self._match(TokenType.COMMA) 1253 indexes.append(index) 1254 elif create_token.token_type == TokenType.VIEW: 1255 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1256 no_schema_binding = True 1257 1258 if self._match_text_seq("CLONE"): 1259 clone = self._parse_table(schema=True) 1260 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1261 clone_kind = ( 1262 self._match(TokenType.L_PAREN) 1263 and self._match_texts(self.CLONE_KINDS) 1264 and self._prev.text.upper() 1265 ) 1266 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1267 self._match(TokenType.R_PAREN) 1268 clone = self.expression( 1269 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1270 ) 1271 1272 return self.expression( 1273 exp.Create, 1274 this=this, 1275 kind=create_token.text, 1276 replace=replace, 1277 unique=unique, 1278 expression=expression, 1279 exists=exists, 1280 properties=properties, 1281 indexes=indexes, 1282 no_schema_binding=no_schema_binding, 1283 begin=begin, 1284 clone=clone, 1285 ) 1286 1287 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1288 # only used for teradata currently 1289 self._match(TokenType.COMMA) 1290 1291 kwargs = { 1292 "no": self._match_text_seq("NO"), 1293 "dual": self._match_text_seq("DUAL"), 1294 "before": self._match_text_seq("BEFORE"), 1295 "default": self._match_text_seq("DEFAULT"), 1296 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1297 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1298 "after": self._match_text_seq("AFTER"), 1299 "minimum": self._match_texts(("MIN", "MINIMUM")), 1300 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1301 } 1302 1303 if self._match_texts(self.PROPERTY_PARSERS): 1304 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1305 try: 1306 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1307 except TypeError: 1308 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1309 1310 return None 1311 1312 def _parse_property(self) -> t.Optional[exp.Expression]: 1313 if self._match_texts(self.PROPERTY_PARSERS): 1314 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1315 1316 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1317 return self._parse_character_set(default=True) 1318 1319 if self._match_text_seq("COMPOUND", "SORTKEY"): 1320 return self._parse_sortkey(compound=True) 1321 1322 if self._match_text_seq("SQL", "SECURITY"): 1323 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1324 1325 assignment = self._match_pair( 1326 TokenType.VAR, TokenType.EQ, advance=False 1327 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1328 1329 if assignment: 1330 key = self._parse_var_or_string() 1331 self._match(TokenType.EQ) 1332 return self.expression(exp.Property, this=key, value=self._parse_column()) 1333 1334 return None 1335 1336 def _parse_stored(self) -> exp.FileFormatProperty: 1337 self._match(TokenType.ALIAS) 1338 1339 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1340 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1341 1342 return self.expression( 1343 exp.FileFormatProperty, 1344 this=self.expression( 1345 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1346 ) 1347 if input_format or output_format 1348 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1349 ) 1350 1351 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1352 self._match(TokenType.EQ) 1353 self._match(TokenType.ALIAS) 1354 return self.expression(exp_class, this=self._parse_field()) 1355 1356 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1357 properties = [] 1358 while True: 1359 if before: 1360 prop = self._parse_property_before() 1361 else: 1362 prop = self._parse_property() 1363 1364 if not prop: 1365 break 1366 for p in ensure_list(prop): 1367 properties.append(p) 1368 1369 if properties: 1370 return self.expression(exp.Properties, expressions=properties) 1371 1372 return None 1373 1374 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1375 return self.expression( 1376 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1377 ) 1378 1379 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1380 if self._index >= 2: 1381 pre_volatile_token = self._tokens[self._index - 2] 1382 else: 1383 pre_volatile_token = None 1384 1385 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1386 return exp.VolatileProperty() 1387 1388 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1389 1390 def _parse_with_property( 1391 self, 1392 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1393 if self._match(TokenType.L_PAREN, advance=False): 1394 return self._parse_wrapped_csv(self._parse_property) 1395 1396 if self._match_text_seq("JOURNAL"): 1397 return self._parse_withjournaltable() 1398 1399 if self._match_text_seq("DATA"): 1400 return self._parse_withdata(no=False) 1401 elif self._match_text_seq("NO", "DATA"): 1402 return self._parse_withdata(no=True) 1403 1404 if not self._next: 1405 return None 1406 1407 return self._parse_withisolatedloading() 1408 1409 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1410 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1411 self._match(TokenType.EQ) 1412 1413 user = self._parse_id_var() 1414 self._match(TokenType.PARAMETER) 1415 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1416 1417 if not user or not host: 1418 return None 1419 1420 return exp.DefinerProperty(this=f"{user}@{host}") 1421 1422 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1423 self._match(TokenType.TABLE) 1424 self._match(TokenType.EQ) 1425 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1426 1427 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1428 return self.expression(exp.LogProperty, no=no) 1429 1430 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1431 return self.expression(exp.JournalProperty, **kwargs) 1432 1433 def _parse_checksum(self) -> exp.ChecksumProperty: 1434 self._match(TokenType.EQ) 1435 1436 on = None 1437 if self._match(TokenType.ON): 1438 on = True 1439 elif self._match_text_seq("OFF"): 1440 on = False 1441 1442 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1443 1444 def _parse_cluster(self) -> exp.Cluster: 1445 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1446 1447 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1448 self._match_text_seq("BY") 1449 1450 self._match_l_paren() 1451 expressions = self._parse_csv(self._parse_column) 1452 self._match_r_paren() 1453 1454 if self._match_text_seq("SORTED", "BY"): 1455 self._match_l_paren() 1456 sorted_by = self._parse_csv(self._parse_ordered) 1457 self._match_r_paren() 1458 else: 1459 sorted_by = None 1460 1461 self._match(TokenType.INTO) 1462 buckets = self._parse_number() 1463 self._match_text_seq("BUCKETS") 1464 1465 return self.expression( 1466 exp.ClusteredByProperty, 1467 expressions=expressions, 1468 sorted_by=sorted_by, 1469 buckets=buckets, 1470 ) 1471 1472 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1473 if not self._match_text_seq("GRANTS"): 1474 self._retreat(self._index - 1) 1475 return None 1476 1477 return self.expression(exp.CopyGrantsProperty) 1478 1479 def _parse_freespace(self) -> exp.FreespaceProperty: 1480 self._match(TokenType.EQ) 1481 return self.expression( 1482 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1483 ) 1484 1485 def _parse_mergeblockratio( 1486 self, no: bool = False, default: bool = False 1487 ) -> exp.MergeBlockRatioProperty: 1488 if self._match(TokenType.EQ): 1489 return self.expression( 1490 exp.MergeBlockRatioProperty, 1491 this=self._parse_number(), 1492 percent=self._match(TokenType.PERCENT), 1493 ) 1494 1495 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1496 1497 def _parse_datablocksize( 1498 self, 1499 default: t.Optional[bool] = None, 1500 minimum: t.Optional[bool] = None, 1501 maximum: t.Optional[bool] = None, 1502 ) -> exp.DataBlocksizeProperty: 1503 self._match(TokenType.EQ) 1504 size = self._parse_number() 1505 1506 units = None 1507 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1508 units = self._prev.text 1509 1510 return self.expression( 1511 exp.DataBlocksizeProperty, 1512 size=size, 1513 units=units, 1514 default=default, 1515 minimum=minimum, 1516 maximum=maximum, 1517 ) 1518 1519 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1520 self._match(TokenType.EQ) 1521 always = self._match_text_seq("ALWAYS") 1522 manual = self._match_text_seq("MANUAL") 1523 never = self._match_text_seq("NEVER") 1524 default = self._match_text_seq("DEFAULT") 1525 1526 autotemp = None 1527 if self._match_text_seq("AUTOTEMP"): 1528 autotemp = self._parse_schema() 1529 1530 return self.expression( 1531 exp.BlockCompressionProperty, 1532 always=always, 1533 manual=manual, 1534 never=never, 1535 default=default, 1536 autotemp=autotemp, 1537 ) 1538 1539 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1540 no = self._match_text_seq("NO") 1541 concurrent = self._match_text_seq("CONCURRENT") 1542 self._match_text_seq("ISOLATED", "LOADING") 1543 for_all = self._match_text_seq("FOR", "ALL") 1544 for_insert = self._match_text_seq("FOR", "INSERT") 1545 for_none = self._match_text_seq("FOR", "NONE") 1546 return self.expression( 1547 exp.IsolatedLoadingProperty, 1548 no=no, 1549 concurrent=concurrent, 1550 for_all=for_all, 1551 for_insert=for_insert, 1552 for_none=for_none, 1553 ) 1554 1555 def _parse_locking(self) -> exp.LockingProperty: 1556 if self._match(TokenType.TABLE): 1557 kind = "TABLE" 1558 elif self._match(TokenType.VIEW): 1559 kind = "VIEW" 1560 elif self._match(TokenType.ROW): 1561 kind = "ROW" 1562 elif self._match_text_seq("DATABASE"): 1563 kind = "DATABASE" 1564 else: 1565 kind = None 1566 1567 if kind in ("DATABASE", "TABLE", "VIEW"): 1568 this = self._parse_table_parts() 1569 else: 1570 this = None 1571 1572 if self._match(TokenType.FOR): 1573 for_or_in = "FOR" 1574 elif self._match(TokenType.IN): 1575 for_or_in = "IN" 1576 else: 1577 for_or_in = None 1578 1579 if self._match_text_seq("ACCESS"): 1580 lock_type = "ACCESS" 1581 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1582 lock_type = "EXCLUSIVE" 1583 elif self._match_text_seq("SHARE"): 1584 lock_type = "SHARE" 1585 elif self._match_text_seq("READ"): 1586 lock_type = "READ" 1587 elif self._match_text_seq("WRITE"): 1588 lock_type = "WRITE" 1589 elif self._match_text_seq("CHECKSUM"): 1590 lock_type = "CHECKSUM" 1591 else: 1592 lock_type = None 1593 1594 override = self._match_text_seq("OVERRIDE") 1595 1596 return self.expression( 1597 exp.LockingProperty, 1598 this=this, 1599 kind=kind, 1600 for_or_in=for_or_in, 1601 lock_type=lock_type, 1602 override=override, 1603 ) 1604 1605 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1606 if self._match(TokenType.PARTITION_BY): 1607 return self._parse_csv(self._parse_conjunction) 1608 return [] 1609 1610 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1611 self._match(TokenType.EQ) 1612 return self.expression( 1613 exp.PartitionedByProperty, 1614 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1615 ) 1616 1617 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1618 if self._match_text_seq("AND", "STATISTICS"): 1619 statistics = True 1620 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1621 statistics = False 1622 else: 1623 statistics = None 1624 1625 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1626 1627 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1628 if self._match_text_seq("PRIMARY", "INDEX"): 1629 return exp.NoPrimaryIndexProperty() 1630 return None 1631 1632 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1633 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1634 return exp.OnCommitProperty() 1635 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1636 return exp.OnCommitProperty(delete=True) 1637 return None 1638 1639 def _parse_distkey(self) -> exp.DistKeyProperty: 1640 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1641 1642 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1643 table = self._parse_table(schema=True) 1644 1645 options = [] 1646 while self._match_texts(("INCLUDING", "EXCLUDING")): 1647 this = self._prev.text.upper() 1648 1649 id_var = self._parse_id_var() 1650 if not id_var: 1651 return None 1652 1653 options.append( 1654 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1655 ) 1656 1657 return self.expression(exp.LikeProperty, this=table, expressions=options) 1658 1659 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1660 return self.expression( 1661 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1662 ) 1663 1664 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1665 self._match(TokenType.EQ) 1666 return self.expression( 1667 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1668 ) 1669 1670 def _parse_returns(self) -> exp.ReturnsProperty: 1671 value: t.Optional[exp.Expression] 1672 is_table = self._match(TokenType.TABLE) 1673 1674 if is_table: 1675 if self._match(TokenType.LT): 1676 value = self.expression( 1677 exp.Schema, 1678 this="TABLE", 1679 expressions=self._parse_csv(self._parse_struct_types), 1680 ) 1681 if not self._match(TokenType.GT): 1682 self.raise_error("Expecting >") 1683 else: 1684 value = self._parse_schema(exp.var("TABLE")) 1685 else: 1686 value = self._parse_types() 1687 1688 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1689 1690 def _parse_describe(self) -> exp.Describe: 1691 kind = self._match_set(self.CREATABLES) and self._prev.text 1692 this = self._parse_table() 1693 return self.expression(exp.Describe, this=this, kind=kind) 1694 1695 def _parse_insert(self) -> exp.Insert: 1696 overwrite = self._match(TokenType.OVERWRITE) 1697 ignore = self._match(TokenType.IGNORE) 1698 local = self._match_text_seq("LOCAL") 1699 alternative = None 1700 1701 if self._match_text_seq("DIRECTORY"): 1702 this: t.Optional[exp.Expression] = self.expression( 1703 exp.Directory, 1704 this=self._parse_var_or_string(), 1705 local=local, 1706 row_format=self._parse_row_format(match_row=True), 1707 ) 1708 else: 1709 if self._match(TokenType.OR): 1710 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1711 1712 self._match(TokenType.INTO) 1713 self._match(TokenType.TABLE) 1714 this = self._parse_table(schema=True) 1715 1716 returning = self._parse_returning() 1717 1718 return self.expression( 1719 exp.Insert, 1720 this=this, 1721 exists=self._parse_exists(), 1722 partition=self._parse_partition(), 1723 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1724 and self._parse_conjunction(), 1725 expression=self._parse_ddl_select(), 1726 conflict=self._parse_on_conflict(), 1727 returning=returning or self._parse_returning(), 1728 overwrite=overwrite, 1729 alternative=alternative, 1730 ignore=ignore, 1731 ) 1732 1733 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1734 conflict = self._match_text_seq("ON", "CONFLICT") 1735 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1736 1737 if not conflict and not duplicate: 1738 return None 1739 1740 nothing = None 1741 expressions = None 1742 key = None 1743 constraint = None 1744 1745 if conflict: 1746 if self._match_text_seq("ON", "CONSTRAINT"): 1747 constraint = self._parse_id_var() 1748 else: 1749 key = self._parse_csv(self._parse_value) 1750 1751 self._match_text_seq("DO") 1752 if self._match_text_seq("NOTHING"): 1753 nothing = True 1754 else: 1755 self._match(TokenType.UPDATE) 1756 self._match(TokenType.SET) 1757 expressions = self._parse_csv(self._parse_equality) 1758 1759 return self.expression( 1760 exp.OnConflict, 1761 duplicate=duplicate, 1762 expressions=expressions, 1763 nothing=nothing, 1764 key=key, 1765 constraint=constraint, 1766 ) 1767 1768 def _parse_returning(self) -> t.Optional[exp.Returning]: 1769 if not self._match(TokenType.RETURNING): 1770 return None 1771 return self.expression( 1772 exp.Returning, 1773 expressions=self._parse_csv(self._parse_expression), 1774 into=self._match(TokenType.INTO) and self._parse_table_part(), 1775 ) 1776 1777 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1778 if not self._match(TokenType.FORMAT): 1779 return None 1780 return self._parse_row_format() 1781 1782 def _parse_row_format( 1783 self, match_row: bool = False 1784 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1785 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1786 return None 1787 1788 if self._match_text_seq("SERDE"): 1789 this = self._parse_string() 1790 1791 serde_properties = None 1792 if self._match(TokenType.SERDE_PROPERTIES): 1793 serde_properties = self.expression( 1794 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1795 ) 1796 1797 return self.expression( 1798 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1799 ) 1800 1801 self._match_text_seq("DELIMITED") 1802 1803 kwargs = {} 1804 1805 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1806 kwargs["fields"] = self._parse_string() 1807 if self._match_text_seq("ESCAPED", "BY"): 1808 kwargs["escaped"] = self._parse_string() 1809 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1810 kwargs["collection_items"] = self._parse_string() 1811 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1812 kwargs["map_keys"] = self._parse_string() 1813 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1814 kwargs["lines"] = self._parse_string() 1815 if self._match_text_seq("NULL", "DEFINED", "AS"): 1816 kwargs["null"] = self._parse_string() 1817 1818 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1819 1820 def _parse_load(self) -> exp.LoadData | exp.Command: 1821 if self._match_text_seq("DATA"): 1822 local = self._match_text_seq("LOCAL") 1823 self._match_text_seq("INPATH") 1824 inpath = self._parse_string() 1825 overwrite = self._match(TokenType.OVERWRITE) 1826 self._match_pair(TokenType.INTO, TokenType.TABLE) 1827 1828 return self.expression( 1829 exp.LoadData, 1830 this=self._parse_table(schema=True), 1831 local=local, 1832 overwrite=overwrite, 1833 inpath=inpath, 1834 partition=self._parse_partition(), 1835 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1836 serde=self._match_text_seq("SERDE") and self._parse_string(), 1837 ) 1838 return self._parse_as_command(self._prev) 1839 1840 def _parse_delete(self) -> exp.Delete: 1841 # This handles MySQL's "Multiple-Table Syntax" 1842 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1843 tables = None 1844 if not self._match(TokenType.FROM, advance=False): 1845 tables = self._parse_csv(self._parse_table) or None 1846 1847 returning = self._parse_returning() 1848 1849 return self.expression( 1850 exp.Delete, 1851 tables=tables, 1852 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1853 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1854 where=self._parse_where(), 1855 returning=returning or self._parse_returning(), 1856 limit=self._parse_limit(), 1857 ) 1858 1859 def _parse_update(self) -> exp.Update: 1860 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1861 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1862 returning = self._parse_returning() 1863 return self.expression( 1864 exp.Update, 1865 **{ # type: ignore 1866 "this": this, 1867 "expressions": expressions, 1868 "from": self._parse_from(joins=True), 1869 "where": self._parse_where(), 1870 "returning": returning or self._parse_returning(), 1871 "limit": self._parse_limit(), 1872 }, 1873 ) 1874 1875 def _parse_uncache(self) -> exp.Uncache: 1876 if not self._match(TokenType.TABLE): 1877 self.raise_error("Expecting TABLE after UNCACHE") 1878 1879 return self.expression( 1880 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1881 ) 1882 1883 def _parse_cache(self) -> exp.Cache: 1884 lazy = self._match_text_seq("LAZY") 1885 self._match(TokenType.TABLE) 1886 table = self._parse_table(schema=True) 1887 1888 options = [] 1889 if self._match_text_seq("OPTIONS"): 1890 self._match_l_paren() 1891 k = self._parse_string() 1892 self._match(TokenType.EQ) 1893 v = self._parse_string() 1894 options = [k, v] 1895 self._match_r_paren() 1896 1897 self._match(TokenType.ALIAS) 1898 return self.expression( 1899 exp.Cache, 1900 this=table, 1901 lazy=lazy, 1902 options=options, 1903 expression=self._parse_select(nested=True), 1904 ) 1905 1906 def _parse_partition(self) -> t.Optional[exp.Partition]: 1907 if not self._match(TokenType.PARTITION): 1908 return None 1909 1910 return self.expression( 1911 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1912 ) 1913 1914 def _parse_value(self) -> exp.Tuple: 1915 if self._match(TokenType.L_PAREN): 1916 expressions = self._parse_csv(self._parse_conjunction) 1917 self._match_r_paren() 1918 return self.expression(exp.Tuple, expressions=expressions) 1919 1920 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1921 # https://prestodb.io/docs/current/sql/values.html 1922 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1923 1924 def _parse_select( 1925 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1926 ) -> t.Optional[exp.Expression]: 1927 cte = self._parse_with() 1928 if cte: 1929 this = self._parse_statement() 1930 1931 if not this: 1932 self.raise_error("Failed to parse any statement following CTE") 1933 return cte 1934 1935 if "with" in this.arg_types: 1936 this.set("with", cte) 1937 else: 1938 self.raise_error(f"{this.key} does not support CTE") 1939 this = cte 1940 elif self._match(TokenType.SELECT): 1941 comments = self._prev_comments 1942 1943 hint = self._parse_hint() 1944 all_ = self._match(TokenType.ALL) 1945 distinct = self._match(TokenType.DISTINCT) 1946 1947 kind = ( 1948 self._match(TokenType.ALIAS) 1949 and self._match_texts(("STRUCT", "VALUE")) 1950 and self._prev.text 1951 ) 1952 1953 if distinct: 1954 distinct = self.expression( 1955 exp.Distinct, 1956 on=self._parse_value() if self._match(TokenType.ON) else None, 1957 ) 1958 1959 if all_ and distinct: 1960 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1961 1962 limit = self._parse_limit(top=True) 1963 expressions = self._parse_expressions() 1964 1965 this = self.expression( 1966 exp.Select, 1967 kind=kind, 1968 hint=hint, 1969 distinct=distinct, 1970 expressions=expressions, 1971 limit=limit, 1972 ) 1973 this.comments = comments 1974 1975 into = self._parse_into() 1976 if into: 1977 this.set("into", into) 1978 1979 from_ = self._parse_from() 1980 if from_: 1981 this.set("from", from_) 1982 1983 this = self._parse_query_modifiers(this) 1984 elif (table or nested) and self._match(TokenType.L_PAREN): 1985 if self._match(TokenType.PIVOT): 1986 this = self._parse_simplified_pivot() 1987 elif self._match(TokenType.FROM): 1988 this = exp.select("*").from_( 1989 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1990 ) 1991 else: 1992 this = self._parse_table() if table else self._parse_select(nested=True) 1993 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1994 1995 self._match_r_paren() 1996 1997 # We return early here so that the UNION isn't attached to the subquery by the 1998 # following call to _parse_set_operations, but instead becomes the parent node 1999 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2000 elif self._match(TokenType.VALUES): 2001 this = self.expression( 2002 exp.Values, 2003 expressions=self._parse_csv(self._parse_value), 2004 alias=self._parse_table_alias(), 2005 ) 2006 else: 2007 this = None 2008 2009 return self._parse_set_operations(this) 2010 2011 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2012 if not skip_with_token and not self._match(TokenType.WITH): 2013 return None 2014 2015 comments = self._prev_comments 2016 recursive = self._match(TokenType.RECURSIVE) 2017 2018 expressions = [] 2019 while True: 2020 expressions.append(self._parse_cte()) 2021 2022 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2023 break 2024 else: 2025 self._match(TokenType.WITH) 2026 2027 return self.expression( 2028 exp.With, comments=comments, expressions=expressions, recursive=recursive 2029 ) 2030 2031 def _parse_cte(self) -> exp.CTE: 2032 alias = self._parse_table_alias() 2033 if not alias or not alias.this: 2034 self.raise_error("Expected CTE to have alias") 2035 2036 self._match(TokenType.ALIAS) 2037 return self.expression( 2038 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2039 ) 2040 2041 def _parse_table_alias( 2042 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2043 ) -> t.Optional[exp.TableAlias]: 2044 any_token = self._match(TokenType.ALIAS) 2045 alias = ( 2046 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2047 or self._parse_string_as_identifier() 2048 ) 2049 2050 index = self._index 2051 if self._match(TokenType.L_PAREN): 2052 columns = self._parse_csv(self._parse_function_parameter) 2053 self._match_r_paren() if columns else self._retreat(index) 2054 else: 2055 columns = None 2056 2057 if not alias and not columns: 2058 return None 2059 2060 return self.expression(exp.TableAlias, this=alias, columns=columns) 2061 2062 def _parse_subquery( 2063 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2064 ) -> t.Optional[exp.Subquery]: 2065 if not this: 2066 return None 2067 2068 return self.expression( 2069 exp.Subquery, 2070 this=this, 2071 pivots=self._parse_pivots(), 2072 alias=self._parse_table_alias() if parse_alias else None, 2073 ) 2074 2075 def _parse_query_modifiers( 2076 self, this: t.Optional[exp.Expression] 2077 ) -> t.Optional[exp.Expression]: 2078 if isinstance(this, self.MODIFIABLES): 2079 for join in iter(self._parse_join, None): 2080 this.append("joins", join) 2081 for lateral in iter(self._parse_lateral, None): 2082 this.append("laterals", lateral) 2083 2084 while True: 2085 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2086 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2087 key, expression = parser(self) 2088 2089 if expression: 2090 this.set(key, expression) 2091 if key == "limit": 2092 offset = expression.args.pop("offset", None) 2093 if offset: 2094 this.set("offset", exp.Offset(expression=offset)) 2095 continue 2096 break 2097 return this 2098 2099 def _parse_hint(self) -> t.Optional[exp.Hint]: 2100 if self._match(TokenType.HINT): 2101 hints = [] 2102 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2103 hints.extend(hint) 2104 2105 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2106 self.raise_error("Expected */ after HINT") 2107 2108 return self.expression(exp.Hint, expressions=hints) 2109 2110 return None 2111 2112 def _parse_into(self) -> t.Optional[exp.Into]: 2113 if not self._match(TokenType.INTO): 2114 return None 2115 2116 temp = self._match(TokenType.TEMPORARY) 2117 unlogged = self._match_text_seq("UNLOGGED") 2118 self._match(TokenType.TABLE) 2119 2120 return self.expression( 2121 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2122 ) 2123 2124 def _parse_from( 2125 self, joins: bool = False, skip_from_token: bool = False 2126 ) -> t.Optional[exp.From]: 2127 if not skip_from_token and not self._match(TokenType.FROM): 2128 return None 2129 2130 return self.expression( 2131 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2132 ) 2133 2134 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2135 if not self._match(TokenType.MATCH_RECOGNIZE): 2136 return None 2137 2138 self._match_l_paren() 2139 2140 partition = self._parse_partition_by() 2141 order = self._parse_order() 2142 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2143 2144 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2145 rows = exp.var("ONE ROW PER MATCH") 2146 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2147 text = "ALL ROWS PER MATCH" 2148 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2149 text += f" SHOW EMPTY MATCHES" 2150 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2151 text += f" OMIT EMPTY MATCHES" 2152 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2153 text += f" WITH UNMATCHED ROWS" 2154 rows = exp.var(text) 2155 else: 2156 rows = None 2157 2158 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2159 text = "AFTER MATCH SKIP" 2160 if self._match_text_seq("PAST", "LAST", "ROW"): 2161 text += f" PAST LAST ROW" 2162 elif self._match_text_seq("TO", "NEXT", "ROW"): 2163 text += f" TO NEXT ROW" 2164 elif self._match_text_seq("TO", "FIRST"): 2165 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2166 elif self._match_text_seq("TO", "LAST"): 2167 text += f" TO LAST {self._advance_any().text}" # type: ignore 2168 after = exp.var(text) 2169 else: 2170 after = None 2171 2172 if self._match_text_seq("PATTERN"): 2173 self._match_l_paren() 2174 2175 if not self._curr: 2176 self.raise_error("Expecting )", self._curr) 2177 2178 paren = 1 2179 start = self._curr 2180 2181 while self._curr and paren > 0: 2182 if self._curr.token_type == TokenType.L_PAREN: 2183 paren += 1 2184 if self._curr.token_type == TokenType.R_PAREN: 2185 paren -= 1 2186 2187 end = self._prev 2188 self._advance() 2189 2190 if paren > 0: 2191 self.raise_error("Expecting )", self._curr) 2192 2193 pattern = exp.var(self._find_sql(start, end)) 2194 else: 2195 pattern = None 2196 2197 define = ( 2198 self._parse_csv( 2199 lambda: self.expression( 2200 exp.Alias, 2201 alias=self._parse_id_var(any_token=True), 2202 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2203 ) 2204 ) 2205 if self._match_text_seq("DEFINE") 2206 else None 2207 ) 2208 2209 self._match_r_paren() 2210 2211 return self.expression( 2212 exp.MatchRecognize, 2213 partition_by=partition, 2214 order=order, 2215 measures=measures, 2216 rows=rows, 2217 after=after, 2218 pattern=pattern, 2219 define=define, 2220 alias=self._parse_table_alias(), 2221 ) 2222 2223 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2224 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2225 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2226 2227 if outer_apply or cross_apply: 2228 this = self._parse_select(table=True) 2229 view = None 2230 outer = not cross_apply 2231 elif self._match(TokenType.LATERAL): 2232 this = self._parse_select(table=True) 2233 view = self._match(TokenType.VIEW) 2234 outer = self._match(TokenType.OUTER) 2235 else: 2236 return None 2237 2238 if not this: 2239 this = self._parse_function() or self._parse_id_var(any_token=False) 2240 while self._match(TokenType.DOT): 2241 this = exp.Dot( 2242 this=this, 2243 expression=self._parse_function() or self._parse_id_var(any_token=False), 2244 ) 2245 2246 if view: 2247 table = self._parse_id_var(any_token=False) 2248 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2249 table_alias: t.Optional[exp.TableAlias] = self.expression( 2250 exp.TableAlias, this=table, columns=columns 2251 ) 2252 elif isinstance(this, exp.Subquery) and this.alias: 2253 # Ensures parity between the Subquery's and the Lateral's "alias" args 2254 table_alias = this.args["alias"].copy() 2255 else: 2256 table_alias = self._parse_table_alias() 2257 2258 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2259 2260 def _parse_join_parts( 2261 self, 2262 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2263 return ( 2264 self._match_set(self.JOIN_METHODS) and self._prev, 2265 self._match_set(self.JOIN_SIDES) and self._prev, 2266 self._match_set(self.JOIN_KINDS) and self._prev, 2267 ) 2268 2269 def _parse_join( 2270 self, skip_join_token: bool = False, parse_bracket: bool = False 2271 ) -> t.Optional[exp.Join]: 2272 if self._match(TokenType.COMMA): 2273 return self.expression(exp.Join, this=self._parse_table()) 2274 2275 index = self._index 2276 method, side, kind = self._parse_join_parts() 2277 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2278 join = self._match(TokenType.JOIN) 2279 2280 if not skip_join_token and not join: 2281 self._retreat(index) 2282 kind = None 2283 method = None 2284 side = None 2285 2286 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2287 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2288 2289 if not skip_join_token and not join and not outer_apply and not cross_apply: 2290 return None 2291 2292 if outer_apply: 2293 side = Token(TokenType.LEFT, "LEFT") 2294 2295 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2296 2297 if method: 2298 kwargs["method"] = method.text 2299 if side: 2300 kwargs["side"] = side.text 2301 if kind: 2302 kwargs["kind"] = kind.text 2303 if hint: 2304 kwargs["hint"] = hint 2305 2306 if self._match(TokenType.ON): 2307 kwargs["on"] = self._parse_conjunction() 2308 elif self._match(TokenType.USING): 2309 kwargs["using"] = self._parse_wrapped_id_vars() 2310 elif not (kind and kind.token_type == TokenType.CROSS): 2311 index = self._index 2312 joins = self._parse_joins() 2313 2314 if joins and self._match(TokenType.ON): 2315 kwargs["on"] = self._parse_conjunction() 2316 elif joins and self._match(TokenType.USING): 2317 kwargs["using"] = self._parse_wrapped_id_vars() 2318 else: 2319 joins = None 2320 self._retreat(index) 2321 2322 kwargs["this"].set("joins", joins) 2323 2324 return self.expression(exp.Join, **kwargs) 2325 2326 def _parse_index( 2327 self, 2328 index: t.Optional[exp.Expression] = None, 2329 ) -> t.Optional[exp.Index]: 2330 if index: 2331 unique = None 2332 primary = None 2333 amp = None 2334 2335 self._match(TokenType.ON) 2336 self._match(TokenType.TABLE) # hive 2337 table = self._parse_table_parts(schema=True) 2338 else: 2339 unique = self._match(TokenType.UNIQUE) 2340 primary = self._match_text_seq("PRIMARY") 2341 amp = self._match_text_seq("AMP") 2342 2343 if not self._match(TokenType.INDEX): 2344 return None 2345 2346 index = self._parse_id_var() 2347 table = None 2348 2349 using = self._parse_field() if self._match(TokenType.USING) else None 2350 2351 if self._match(TokenType.L_PAREN, advance=False): 2352 columns = self._parse_wrapped_csv(self._parse_ordered) 2353 else: 2354 columns = None 2355 2356 return self.expression( 2357 exp.Index, 2358 this=index, 2359 table=table, 2360 using=using, 2361 columns=columns, 2362 unique=unique, 2363 primary=primary, 2364 amp=amp, 2365 partition_by=self._parse_partition_by(), 2366 ) 2367 2368 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2369 hints: t.List[exp.Expression] = [] 2370 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2371 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2372 hints.append( 2373 self.expression( 2374 exp.WithTableHint, 2375 expressions=self._parse_csv( 2376 lambda: self._parse_function() or self._parse_var(any_token=True) 2377 ), 2378 ) 2379 ) 2380 self._match_r_paren() 2381 else: 2382 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2383 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2384 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2385 2386 self._match_texts({"INDEX", "KEY"}) 2387 if self._match(TokenType.FOR): 2388 hint.set("target", self._advance_any() and self._prev.text.upper()) 2389 2390 hint.set("expressions", self._parse_wrapped_id_vars()) 2391 hints.append(hint) 2392 2393 return hints or None 2394 2395 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2396 return ( 2397 (not schema and self._parse_function(optional_parens=False)) 2398 or self._parse_id_var(any_token=False) 2399 or self._parse_string_as_identifier() 2400 or self._parse_placeholder() 2401 ) 2402 2403 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2404 catalog = None 2405 db = None 2406 table = self._parse_table_part(schema=schema) 2407 2408 while self._match(TokenType.DOT): 2409 if catalog: 2410 # This allows nesting the table in arbitrarily many dot expressions if needed 2411 table = self.expression( 2412 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2413 ) 2414 else: 2415 catalog = db 2416 db = table 2417 table = self._parse_table_part(schema=schema) 2418 2419 if not table: 2420 self.raise_error(f"Expected table name but got {self._curr}") 2421 2422 return self.expression( 2423 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2424 ) 2425 2426 def _parse_table( 2427 self, 2428 schema: bool = False, 2429 joins: bool = False, 2430 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2431 parse_bracket: bool = False, 2432 ) -> t.Optional[exp.Expression]: 2433 lateral = self._parse_lateral() 2434 if lateral: 2435 return lateral 2436 2437 unnest = self._parse_unnest() 2438 if unnest: 2439 return unnest 2440 2441 values = self._parse_derived_table_values() 2442 if values: 2443 return values 2444 2445 subquery = self._parse_select(table=True) 2446 if subquery: 2447 if not subquery.args.get("pivots"): 2448 subquery.set("pivots", self._parse_pivots()) 2449 return subquery 2450 2451 bracket = parse_bracket and self._parse_bracket(None) 2452 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2453 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2454 2455 if schema: 2456 return self._parse_schema(this=this) 2457 2458 if self.ALIAS_POST_TABLESAMPLE: 2459 table_sample = self._parse_table_sample() 2460 2461 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2462 if alias: 2463 this.set("alias", alias) 2464 2465 if not this.args.get("pivots"): 2466 this.set("pivots", self._parse_pivots()) 2467 2468 this.set("hints", self._parse_table_hints()) 2469 2470 if not self.ALIAS_POST_TABLESAMPLE: 2471 table_sample = self._parse_table_sample() 2472 2473 if table_sample: 2474 table_sample.set("this", this) 2475 this = table_sample 2476 2477 if joins: 2478 for join in iter(self._parse_join, None): 2479 this.append("joins", join) 2480 2481 return this 2482 2483 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2484 if not self._match(TokenType.UNNEST): 2485 return None 2486 2487 expressions = self._parse_wrapped_csv(self._parse_type) 2488 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2489 2490 alias = self._parse_table_alias() if with_alias else None 2491 2492 if alias and self.UNNEST_COLUMN_ONLY: 2493 if alias.args.get("columns"): 2494 self.raise_error("Unexpected extra column alias in unnest.") 2495 2496 alias.set("columns", [alias.this]) 2497 alias.set("this", None) 2498 2499 offset = None 2500 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2501 self._match(TokenType.ALIAS) 2502 offset = self._parse_id_var() or exp.to_identifier("offset") 2503 2504 return self.expression( 2505 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2506 ) 2507 2508 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2509 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2510 if not is_derived and not self._match(TokenType.VALUES): 2511 return None 2512 2513 expressions = self._parse_csv(self._parse_value) 2514 alias = self._parse_table_alias() 2515 2516 if is_derived: 2517 self._match_r_paren() 2518 2519 return self.expression( 2520 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2521 ) 2522 2523 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2524 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2525 as_modifier and self._match_text_seq("USING", "SAMPLE") 2526 ): 2527 return None 2528 2529 bucket_numerator = None 2530 bucket_denominator = None 2531 bucket_field = None 2532 percent = None 2533 rows = None 2534 size = None 2535 seed = None 2536 2537 kind = ( 2538 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2539 ) 2540 method = self._parse_var(tokens=(TokenType.ROW,)) 2541 2542 self._match(TokenType.L_PAREN) 2543 2544 num = self._parse_number() 2545 2546 if self._match_text_seq("BUCKET"): 2547 bucket_numerator = self._parse_number() 2548 self._match_text_seq("OUT", "OF") 2549 bucket_denominator = bucket_denominator = self._parse_number() 2550 self._match(TokenType.ON) 2551 bucket_field = self._parse_field() 2552 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2553 percent = num 2554 elif self._match(TokenType.ROWS): 2555 rows = num 2556 else: 2557 size = num 2558 2559 self._match(TokenType.R_PAREN) 2560 2561 if self._match(TokenType.L_PAREN): 2562 method = self._parse_var() 2563 seed = self._match(TokenType.COMMA) and self._parse_number() 2564 self._match_r_paren() 2565 elif self._match_texts(("SEED", "REPEATABLE")): 2566 seed = self._parse_wrapped(self._parse_number) 2567 2568 return self.expression( 2569 exp.TableSample, 2570 method=method, 2571 bucket_numerator=bucket_numerator, 2572 bucket_denominator=bucket_denominator, 2573 bucket_field=bucket_field, 2574 percent=percent, 2575 rows=rows, 2576 size=size, 2577 seed=seed, 2578 kind=kind, 2579 ) 2580 2581 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2582 return list(iter(self._parse_pivot, None)) or None 2583 2584 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2585 return list(iter(self._parse_join, None)) or None 2586 2587 # https://duckdb.org/docs/sql/statements/pivot 2588 def _parse_simplified_pivot(self) -> exp.Pivot: 2589 def _parse_on() -> t.Optional[exp.Expression]: 2590 this = self._parse_bitwise() 2591 return self._parse_in(this) if self._match(TokenType.IN) else this 2592 2593 this = self._parse_table() 2594 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2595 using = self._match(TokenType.USING) and self._parse_csv( 2596 lambda: self._parse_alias(self._parse_function()) 2597 ) 2598 group = self._parse_group() 2599 return self.expression( 2600 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2601 ) 2602 2603 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2604 index = self._index 2605 2606 if self._match(TokenType.PIVOT): 2607 unpivot = False 2608 elif self._match(TokenType.UNPIVOT): 2609 unpivot = True 2610 else: 2611 return None 2612 2613 expressions = [] 2614 field = None 2615 2616 if not self._match(TokenType.L_PAREN): 2617 self._retreat(index) 2618 return None 2619 2620 if unpivot: 2621 expressions = self._parse_csv(self._parse_column) 2622 else: 2623 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2624 2625 if not expressions: 2626 self.raise_error("Failed to parse PIVOT's aggregation list") 2627 2628 if not self._match(TokenType.FOR): 2629 self.raise_error("Expecting FOR") 2630 2631 value = self._parse_column() 2632 2633 if not self._match(TokenType.IN): 2634 self.raise_error("Expecting IN") 2635 2636 field = self._parse_in(value, alias=True) 2637 2638 self._match_r_paren() 2639 2640 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2641 2642 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2643 pivot.set("alias", self._parse_table_alias()) 2644 2645 if not unpivot: 2646 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2647 2648 columns: t.List[exp.Expression] = [] 2649 for fld in pivot.args["field"].expressions: 2650 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2651 for name in names: 2652 if self.PREFIXED_PIVOT_COLUMNS: 2653 name = f"{name}_{field_name}" if name else field_name 2654 else: 2655 name = f"{field_name}_{name}" if name else field_name 2656 2657 columns.append(exp.to_identifier(name)) 2658 2659 pivot.set("columns", columns) 2660 2661 return pivot 2662 2663 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2664 return [agg.alias for agg in aggregations] 2665 2666 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2667 if not skip_where_token and not self._match(TokenType.WHERE): 2668 return None 2669 2670 return self.expression( 2671 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2672 ) 2673 2674 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2675 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2676 return None 2677 2678 elements = defaultdict(list) 2679 2680 if self._match(TokenType.ALL): 2681 return self.expression(exp.Group, all=True) 2682 2683 while True: 2684 expressions = self._parse_csv(self._parse_conjunction) 2685 if expressions: 2686 elements["expressions"].extend(expressions) 2687 2688 grouping_sets = self._parse_grouping_sets() 2689 if grouping_sets: 2690 elements["grouping_sets"].extend(grouping_sets) 2691 2692 rollup = None 2693 cube = None 2694 totals = None 2695 2696 with_ = self._match(TokenType.WITH) 2697 if self._match(TokenType.ROLLUP): 2698 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2699 elements["rollup"].extend(ensure_list(rollup)) 2700 2701 if self._match(TokenType.CUBE): 2702 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2703 elements["cube"].extend(ensure_list(cube)) 2704 2705 if self._match_text_seq("TOTALS"): 2706 totals = True 2707 elements["totals"] = True # type: ignore 2708 2709 if not (grouping_sets or rollup or cube or totals): 2710 break 2711 2712 return self.expression(exp.Group, **elements) # type: ignore 2713 2714 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2715 if not self._match(TokenType.GROUPING_SETS): 2716 return None 2717 2718 return self._parse_wrapped_csv(self._parse_grouping_set) 2719 2720 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2721 if self._match(TokenType.L_PAREN): 2722 grouping_set = self._parse_csv(self._parse_column) 2723 self._match_r_paren() 2724 return self.expression(exp.Tuple, expressions=grouping_set) 2725 2726 return self._parse_column() 2727 2728 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2729 if not skip_having_token and not self._match(TokenType.HAVING): 2730 return None 2731 return self.expression(exp.Having, this=self._parse_conjunction()) 2732 2733 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2734 if not self._match(TokenType.QUALIFY): 2735 return None 2736 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2737 2738 def _parse_order( 2739 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2740 ) -> t.Optional[exp.Expression]: 2741 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2742 return this 2743 2744 return self.expression( 2745 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2746 ) 2747 2748 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2749 if not self._match(token): 2750 return None 2751 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2752 2753 def _parse_ordered(self) -> exp.Ordered: 2754 this = self._parse_conjunction() 2755 self._match(TokenType.ASC) 2756 2757 is_desc = self._match(TokenType.DESC) 2758 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2759 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2760 desc = is_desc or False 2761 asc = not desc 2762 nulls_first = is_nulls_first or False 2763 explicitly_null_ordered = is_nulls_first or is_nulls_last 2764 2765 if ( 2766 not explicitly_null_ordered 2767 and ( 2768 (asc and self.NULL_ORDERING == "nulls_are_small") 2769 or (desc and self.NULL_ORDERING != "nulls_are_small") 2770 ) 2771 and self.NULL_ORDERING != "nulls_are_last" 2772 ): 2773 nulls_first = True 2774 2775 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2776 2777 def _parse_limit( 2778 self, this: t.Optional[exp.Expression] = None, top: bool = False 2779 ) -> t.Optional[exp.Expression]: 2780 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2781 comments = self._prev_comments 2782 if top: 2783 limit_paren = self._match(TokenType.L_PAREN) 2784 expression = self._parse_number() 2785 2786 if limit_paren: 2787 self._match_r_paren() 2788 else: 2789 expression = self._parse_term() 2790 2791 if self._match(TokenType.COMMA): 2792 offset = expression 2793 expression = self._parse_term() 2794 else: 2795 offset = None 2796 2797 limit_exp = self.expression( 2798 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2799 ) 2800 2801 return limit_exp 2802 2803 if self._match(TokenType.FETCH): 2804 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2805 direction = self._prev.text if direction else "FIRST" 2806 2807 count = self._parse_number() 2808 percent = self._match(TokenType.PERCENT) 2809 2810 self._match_set((TokenType.ROW, TokenType.ROWS)) 2811 2812 only = self._match_text_seq("ONLY") 2813 with_ties = self._match_text_seq("WITH", "TIES") 2814 2815 if only and with_ties: 2816 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2817 2818 return self.expression( 2819 exp.Fetch, 2820 direction=direction, 2821 count=count, 2822 percent=percent, 2823 with_ties=with_ties, 2824 ) 2825 2826 return this 2827 2828 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2829 if not self._match(TokenType.OFFSET): 2830 return this 2831 2832 count = self._parse_term() 2833 self._match_set((TokenType.ROW, TokenType.ROWS)) 2834 return self.expression(exp.Offset, this=this, expression=count) 2835 2836 def _parse_locks(self) -> t.List[exp.Lock]: 2837 locks = [] 2838 while True: 2839 if self._match_text_seq("FOR", "UPDATE"): 2840 update = True 2841 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2842 "LOCK", "IN", "SHARE", "MODE" 2843 ): 2844 update = False 2845 else: 2846 break 2847 2848 expressions = None 2849 if self._match_text_seq("OF"): 2850 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2851 2852 wait: t.Optional[bool | exp.Expression] = None 2853 if self._match_text_seq("NOWAIT"): 2854 wait = True 2855 elif self._match_text_seq("WAIT"): 2856 wait = self._parse_primary() 2857 elif self._match_text_seq("SKIP", "LOCKED"): 2858 wait = False 2859 2860 locks.append( 2861 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2862 ) 2863 2864 return locks 2865 2866 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2867 if not self._match_set(self.SET_OPERATIONS): 2868 return this 2869 2870 token_type = self._prev.token_type 2871 2872 if token_type == TokenType.UNION: 2873 expression = exp.Union 2874 elif token_type == TokenType.EXCEPT: 2875 expression = exp.Except 2876 else: 2877 expression = exp.Intersect 2878 2879 return self.expression( 2880 expression, 2881 this=this, 2882 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2883 expression=self._parse_set_operations(self._parse_select(nested=True)), 2884 ) 2885 2886 def _parse_expression(self) -> t.Optional[exp.Expression]: 2887 return self._parse_alias(self._parse_conjunction()) 2888 2889 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2890 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2891 2892 def _parse_equality(self) -> t.Optional[exp.Expression]: 2893 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2894 2895 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2896 return self._parse_tokens(self._parse_range, self.COMPARISON) 2897 2898 def _parse_range(self) -> t.Optional[exp.Expression]: 2899 this = self._parse_bitwise() 2900 negate = self._match(TokenType.NOT) 2901 2902 if self._match_set(self.RANGE_PARSERS): 2903 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2904 if not expression: 2905 return this 2906 2907 this = expression 2908 elif self._match(TokenType.ISNULL): 2909 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2910 2911 # Postgres supports ISNULL and NOTNULL for conditions. 2912 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2913 if self._match(TokenType.NOTNULL): 2914 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2915 this = self.expression(exp.Not, this=this) 2916 2917 if negate: 2918 this = self.expression(exp.Not, this=this) 2919 2920 if self._match(TokenType.IS): 2921 this = self._parse_is(this) 2922 2923 return this 2924 2925 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2926 index = self._index - 1 2927 negate = self._match(TokenType.NOT) 2928 2929 if self._match_text_seq("DISTINCT", "FROM"): 2930 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2931 return self.expression(klass, this=this, expression=self._parse_expression()) 2932 2933 expression = self._parse_null() or self._parse_boolean() 2934 if not expression: 2935 self._retreat(index) 2936 return None 2937 2938 this = self.expression(exp.Is, this=this, expression=expression) 2939 return self.expression(exp.Not, this=this) if negate else this 2940 2941 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2942 unnest = self._parse_unnest(with_alias=False) 2943 if unnest: 2944 this = self.expression(exp.In, this=this, unnest=unnest) 2945 elif self._match(TokenType.L_PAREN): 2946 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2947 2948 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2949 this = self.expression(exp.In, this=this, query=expressions[0]) 2950 else: 2951 this = self.expression(exp.In, this=this, expressions=expressions) 2952 2953 self._match_r_paren(this) 2954 else: 2955 this = self.expression(exp.In, this=this, field=self._parse_field()) 2956 2957 return this 2958 2959 def _parse_between(self, this: exp.Expression) -> exp.Between: 2960 low = self._parse_bitwise() 2961 self._match(TokenType.AND) 2962 high = self._parse_bitwise() 2963 return self.expression(exp.Between, this=this, low=low, high=high) 2964 2965 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2966 if not self._match(TokenType.ESCAPE): 2967 return this 2968 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2969 2970 def _parse_interval(self) -> t.Optional[exp.Interval]: 2971 if not self._match(TokenType.INTERVAL): 2972 return None 2973 2974 if self._match(TokenType.STRING, advance=False): 2975 this = self._parse_primary() 2976 else: 2977 this = self._parse_term() 2978 2979 unit = self._parse_function() or self._parse_var() 2980 2981 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2982 # each INTERVAL expression into this canonical form so it's easy to transpile 2983 if this and this.is_number: 2984 this = exp.Literal.string(this.name) 2985 elif this and this.is_string: 2986 parts = this.name.split() 2987 2988 if len(parts) == 2: 2989 if unit: 2990 # this is not actually a unit, it's something else 2991 unit = None 2992 self._retreat(self._index - 1) 2993 else: 2994 this = exp.Literal.string(parts[0]) 2995 unit = self.expression(exp.Var, this=parts[1]) 2996 2997 return self.expression(exp.Interval, this=this, unit=unit) 2998 2999 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3000 this = self._parse_term() 3001 3002 while True: 3003 if self._match_set(self.BITWISE): 3004 this = self.expression( 3005 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3006 ) 3007 elif self._match_pair(TokenType.LT, TokenType.LT): 3008 this = self.expression( 3009 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3010 ) 3011 elif self._match_pair(TokenType.GT, TokenType.GT): 3012 this = self.expression( 3013 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3014 ) 3015 else: 3016 break 3017 3018 return this 3019 3020 def _parse_term(self) -> t.Optional[exp.Expression]: 3021 return self._parse_tokens(self._parse_factor, self.TERM) 3022 3023 def _parse_factor(self) -> t.Optional[exp.Expression]: 3024 return self._parse_tokens(self._parse_unary, self.FACTOR) 3025 3026 def _parse_unary(self) -> t.Optional[exp.Expression]: 3027 if self._match_set(self.UNARY_PARSERS): 3028 return self.UNARY_PARSERS[self._prev.token_type](self) 3029 return self._parse_at_time_zone(self._parse_type()) 3030 3031 def _parse_type(self) -> t.Optional[exp.Expression]: 3032 interval = self._parse_interval() 3033 if interval: 3034 return interval 3035 3036 index = self._index 3037 data_type = self._parse_types(check_func=True) 3038 this = self._parse_column() 3039 3040 if data_type: 3041 if isinstance(this, exp.Literal): 3042 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3043 if parser: 3044 return parser(self, this, data_type) 3045 return self.expression(exp.Cast, this=this, to=data_type) 3046 if not data_type.expressions: 3047 self._retreat(index) 3048 return self._parse_column() 3049 return self._parse_column_ops(data_type) 3050 3051 return this 3052 3053 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3054 this = self._parse_type() 3055 if not this: 3056 return None 3057 3058 return self.expression( 3059 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3060 ) 3061 3062 def _parse_types( 3063 self, check_func: bool = False, schema: bool = False 3064 ) -> t.Optional[exp.Expression]: 3065 index = self._index 3066 3067 prefix = self._match_text_seq("SYSUDTLIB", ".") 3068 3069 if not self._match_set(self.TYPE_TOKENS): 3070 return None 3071 3072 type_token = self._prev.token_type 3073 3074 if type_token == TokenType.PSEUDO_TYPE: 3075 return self.expression(exp.PseudoType, this=self._prev.text) 3076 3077 nested = type_token in self.NESTED_TYPE_TOKENS 3078 is_struct = type_token == TokenType.STRUCT 3079 expressions = None 3080 maybe_func = False 3081 3082 if self._match(TokenType.L_PAREN): 3083 if is_struct: 3084 expressions = self._parse_csv(self._parse_struct_types) 3085 elif nested: 3086 expressions = self._parse_csv( 3087 lambda: self._parse_types(check_func=check_func, schema=schema) 3088 ) 3089 elif type_token in self.ENUM_TYPE_TOKENS: 3090 expressions = self._parse_csv(self._parse_primary) 3091 else: 3092 expressions = self._parse_csv(self._parse_type_size) 3093 3094 if not expressions or not self._match(TokenType.R_PAREN): 3095 self._retreat(index) 3096 return None 3097 3098 maybe_func = True 3099 3100 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3101 this = exp.DataType( 3102 this=exp.DataType.Type.ARRAY, 3103 expressions=[ 3104 exp.DataType( 3105 this=exp.DataType.Type[type_token.value], 3106 expressions=expressions, 3107 nested=nested, 3108 ) 3109 ], 3110 nested=True, 3111 ) 3112 3113 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3114 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3115 3116 return this 3117 3118 if self._match(TokenType.L_BRACKET): 3119 self._retreat(index) 3120 return None 3121 3122 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3123 if nested and self._match(TokenType.LT): 3124 if is_struct: 3125 expressions = self._parse_csv(self._parse_struct_types) 3126 else: 3127 expressions = self._parse_csv( 3128 lambda: self._parse_types(check_func=check_func, schema=schema) 3129 ) 3130 3131 if not self._match(TokenType.GT): 3132 self.raise_error("Expecting >") 3133 3134 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3135 values = self._parse_csv(self._parse_conjunction) 3136 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3137 3138 value: t.Optional[exp.Expression] = None 3139 if type_token in self.TIMESTAMPS: 3140 if self._match_text_seq("WITH", "TIME", "ZONE"): 3141 maybe_func = False 3142 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3143 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3144 maybe_func = False 3145 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3146 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3147 maybe_func = False 3148 elif type_token == TokenType.INTERVAL: 3149 unit = self._parse_var() 3150 3151 if not unit: 3152 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3153 else: 3154 value = self.expression(exp.Interval, unit=unit) 3155 3156 if maybe_func and check_func: 3157 index2 = self._index 3158 peek = self._parse_string() 3159 3160 if not peek: 3161 self._retreat(index) 3162 return None 3163 3164 self._retreat(index2) 3165 3166 if value: 3167 return value 3168 3169 return exp.DataType( 3170 this=exp.DataType.Type[type_token.value], 3171 expressions=expressions, 3172 nested=nested, 3173 values=values, 3174 prefix=prefix, 3175 ) 3176 3177 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3178 this = self._parse_type() or self._parse_id_var() 3179 self._match(TokenType.COLON) 3180 return self._parse_column_def(this) 3181 3182 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3183 if not self._match_text_seq("AT", "TIME", "ZONE"): 3184 return this 3185 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3186 3187 def _parse_column(self) -> t.Optional[exp.Expression]: 3188 this = self._parse_field() 3189 if isinstance(this, exp.Identifier): 3190 this = self.expression(exp.Column, this=this) 3191 elif not this: 3192 return self._parse_bracket(this) 3193 return self._parse_column_ops(this) 3194 3195 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3196 this = self._parse_bracket(this) 3197 3198 while self._match_set(self.COLUMN_OPERATORS): 3199 op_token = self._prev.token_type 3200 op = self.COLUMN_OPERATORS.get(op_token) 3201 3202 if op_token == TokenType.DCOLON: 3203 field = self._parse_types() 3204 if not field: 3205 self.raise_error("Expected type") 3206 elif op and self._curr: 3207 self._advance() 3208 value = self._prev.text 3209 field = ( 3210 exp.Literal.number(value) 3211 if self._prev.token_type == TokenType.NUMBER 3212 else exp.Literal.string(value) 3213 ) 3214 else: 3215 field = self._parse_field(anonymous_func=True, any_token=True) 3216 3217 if isinstance(field, exp.Func): 3218 # bigquery allows function calls like x.y.count(...) 3219 # SAFE.SUBSTR(...) 3220 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3221 this = self._replace_columns_with_dots(this) 3222 3223 if op: 3224 this = op(self, this, field) 3225 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3226 this = self.expression( 3227 exp.Column, 3228 this=field, 3229 table=this.this, 3230 db=this.args.get("table"), 3231 catalog=this.args.get("db"), 3232 ) 3233 else: 3234 this = self.expression(exp.Dot, this=this, expression=field) 3235 this = self._parse_bracket(this) 3236 return this 3237 3238 def _parse_primary(self) -> t.Optional[exp.Expression]: 3239 if self._match_set(self.PRIMARY_PARSERS): 3240 token_type = self._prev.token_type 3241 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3242 3243 if token_type == TokenType.STRING: 3244 expressions = [primary] 3245 while self._match(TokenType.STRING): 3246 expressions.append(exp.Literal.string(self._prev.text)) 3247 3248 if len(expressions) > 1: 3249 return self.expression(exp.Concat, expressions=expressions) 3250 3251 return primary 3252 3253 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3254 return exp.Literal.number(f"0.{self._prev.text}") 3255 3256 if self._match(TokenType.L_PAREN): 3257 comments = self._prev_comments 3258 query = self._parse_select() 3259 3260 if query: 3261 expressions = [query] 3262 else: 3263 expressions = self._parse_expressions() 3264 3265 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3266 3267 if isinstance(this, exp.Subqueryable): 3268 this = self._parse_set_operations( 3269 self._parse_subquery(this=this, parse_alias=False) 3270 ) 3271 elif len(expressions) > 1: 3272 this = self.expression(exp.Tuple, expressions=expressions) 3273 else: 3274 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3275 3276 if this: 3277 this.add_comments(comments) 3278 3279 self._match_r_paren(expression=this) 3280 return this 3281 3282 return None 3283 3284 def _parse_field( 3285 self, 3286 any_token: bool = False, 3287 tokens: t.Optional[t.Collection[TokenType]] = None, 3288 anonymous_func: bool = False, 3289 ) -> t.Optional[exp.Expression]: 3290 return ( 3291 self._parse_primary() 3292 or self._parse_function(anonymous=anonymous_func) 3293 or self._parse_id_var(any_token=any_token, tokens=tokens) 3294 ) 3295 3296 def _parse_function( 3297 self, 3298 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3299 anonymous: bool = False, 3300 optional_parens: bool = True, 3301 ) -> t.Optional[exp.Expression]: 3302 if not self._curr: 3303 return None 3304 3305 token_type = self._curr.token_type 3306 3307 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3308 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3309 3310 if not self._next or self._next.token_type != TokenType.L_PAREN: 3311 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3312 self._advance() 3313 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3314 3315 return None 3316 3317 if token_type not in self.FUNC_TOKENS: 3318 return None 3319 3320 this = self._curr.text 3321 upper = this.upper() 3322 self._advance(2) 3323 3324 parser = self.FUNCTION_PARSERS.get(upper) 3325 3326 if parser and not anonymous: 3327 this = parser(self) 3328 else: 3329 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3330 3331 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3332 this = self.expression(subquery_predicate, this=self._parse_select()) 3333 self._match_r_paren() 3334 return this 3335 3336 if functions is None: 3337 functions = self.FUNCTIONS 3338 3339 function = functions.get(upper) 3340 3341 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3342 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3343 3344 if function and not anonymous: 3345 this = self.validate_expression(function(args), args) 3346 else: 3347 this = self.expression(exp.Anonymous, this=this, expressions=args) 3348 3349 self._match(TokenType.R_PAREN, expression=this) 3350 return self._parse_window(this) 3351 3352 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3353 return self._parse_column_def(self._parse_id_var()) 3354 3355 def _parse_user_defined_function( 3356 self, kind: t.Optional[TokenType] = None 3357 ) -> t.Optional[exp.Expression]: 3358 this = self._parse_id_var() 3359 3360 while self._match(TokenType.DOT): 3361 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3362 3363 if not self._match(TokenType.L_PAREN): 3364 return this 3365 3366 expressions = self._parse_csv(self._parse_function_parameter) 3367 self._match_r_paren() 3368 return self.expression( 3369 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3370 ) 3371 3372 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3373 literal = self._parse_primary() 3374 if literal: 3375 return self.expression(exp.Introducer, this=token.text, expression=literal) 3376 3377 return self.expression(exp.Identifier, this=token.text) 3378 3379 def _parse_session_parameter(self) -> exp.SessionParameter: 3380 kind = None 3381 this = self._parse_id_var() or self._parse_primary() 3382 3383 if this and self._match(TokenType.DOT): 3384 kind = this.name 3385 this = self._parse_var() or self._parse_primary() 3386 3387 return self.expression(exp.SessionParameter, this=this, kind=kind) 3388 3389 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3390 index = self._index 3391 3392 if self._match(TokenType.L_PAREN): 3393 expressions = self._parse_csv(self._parse_id_var) 3394 3395 if not self._match(TokenType.R_PAREN): 3396 self._retreat(index) 3397 else: 3398 expressions = [self._parse_id_var()] 3399 3400 if self._match_set(self.LAMBDAS): 3401 return self.LAMBDAS[self._prev.token_type](self, expressions) 3402 3403 self._retreat(index) 3404 3405 this: t.Optional[exp.Expression] 3406 3407 if self._match(TokenType.DISTINCT): 3408 this = self.expression( 3409 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3410 ) 3411 else: 3412 this = self._parse_select_or_expression(alias=alias) 3413 3414 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3415 3416 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3417 index = self._index 3418 3419 if not self.errors: 3420 try: 3421 if self._parse_select(nested=True): 3422 return this 3423 except ParseError: 3424 pass 3425 finally: 3426 self.errors.clear() 3427 self._retreat(index) 3428 3429 if not self._match(TokenType.L_PAREN): 3430 return this 3431 3432 args = self._parse_csv( 3433 lambda: self._parse_constraint() 3434 or self._parse_column_def(self._parse_field(any_token=True)) 3435 ) 3436 3437 self._match_r_paren() 3438 return self.expression(exp.Schema, this=this, expressions=args) 3439 3440 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3441 # column defs are not really columns, they're identifiers 3442 if isinstance(this, exp.Column): 3443 this = this.this 3444 3445 kind = self._parse_types(schema=True) 3446 3447 if self._match_text_seq("FOR", "ORDINALITY"): 3448 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3449 3450 constraints = [] 3451 while True: 3452 constraint = self._parse_column_constraint() 3453 if not constraint: 3454 break 3455 constraints.append(constraint) 3456 3457 if not kind and not constraints: 3458 return this 3459 3460 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3461 3462 def _parse_auto_increment( 3463 self, 3464 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3465 start = None 3466 increment = None 3467 3468 if self._match(TokenType.L_PAREN, advance=False): 3469 args = self._parse_wrapped_csv(self._parse_bitwise) 3470 start = seq_get(args, 0) 3471 increment = seq_get(args, 1) 3472 elif self._match_text_seq("START"): 3473 start = self._parse_bitwise() 3474 self._match_text_seq("INCREMENT") 3475 increment = self._parse_bitwise() 3476 3477 if start and increment: 3478 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3479 3480 return exp.AutoIncrementColumnConstraint() 3481 3482 def _parse_compress(self) -> exp.CompressColumnConstraint: 3483 if self._match(TokenType.L_PAREN, advance=False): 3484 return self.expression( 3485 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3486 ) 3487 3488 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3489 3490 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3491 if self._match_text_seq("BY", "DEFAULT"): 3492 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3493 this = self.expression( 3494 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3495 ) 3496 else: 3497 self._match_text_seq("ALWAYS") 3498 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3499 3500 self._match(TokenType.ALIAS) 3501 identity = self._match_text_seq("IDENTITY") 3502 3503 if self._match(TokenType.L_PAREN): 3504 if self._match_text_seq("START", "WITH"): 3505 this.set("start", self._parse_bitwise()) 3506 if self._match_text_seq("INCREMENT", "BY"): 3507 this.set("increment", self._parse_bitwise()) 3508 if self._match_text_seq("MINVALUE"): 3509 this.set("minvalue", self._parse_bitwise()) 3510 if self._match_text_seq("MAXVALUE"): 3511 this.set("maxvalue", self._parse_bitwise()) 3512 3513 if self._match_text_seq("CYCLE"): 3514 this.set("cycle", True) 3515 elif self._match_text_seq("NO", "CYCLE"): 3516 this.set("cycle", False) 3517 3518 if not identity: 3519 this.set("expression", self._parse_bitwise()) 3520 3521 self._match_r_paren() 3522 3523 return this 3524 3525 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3526 self._match_text_seq("LENGTH") 3527 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3528 3529 def _parse_not_constraint( 3530 self, 3531 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3532 if self._match_text_seq("NULL"): 3533 return self.expression(exp.NotNullColumnConstraint) 3534 if self._match_text_seq("CASESPECIFIC"): 3535 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3536 return None 3537 3538 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3539 if self._match(TokenType.CONSTRAINT): 3540 this = self._parse_id_var() 3541 else: 3542 this = None 3543 3544 if self._match_texts(self.CONSTRAINT_PARSERS): 3545 return self.expression( 3546 exp.ColumnConstraint, 3547 this=this, 3548 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3549 ) 3550 3551 return this 3552 3553 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3554 if not self._match(TokenType.CONSTRAINT): 3555 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3556 3557 this = self._parse_id_var() 3558 expressions = [] 3559 3560 while True: 3561 constraint = self._parse_unnamed_constraint() or self._parse_function() 3562 if not constraint: 3563 break 3564 expressions.append(constraint) 3565 3566 return self.expression(exp.Constraint, this=this, expressions=expressions) 3567 3568 def _parse_unnamed_constraint( 3569 self, constraints: t.Optional[t.Collection[str]] = None 3570 ) -> t.Optional[exp.Expression]: 3571 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3572 return None 3573 3574 constraint = self._prev.text.upper() 3575 if constraint not in self.CONSTRAINT_PARSERS: 3576 self.raise_error(f"No parser found for schema constraint {constraint}.") 3577 3578 return self.CONSTRAINT_PARSERS[constraint](self) 3579 3580 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3581 self._match_text_seq("KEY") 3582 return self.expression( 3583 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3584 ) 3585 3586 def _parse_key_constraint_options(self) -> t.List[str]: 3587 options = [] 3588 while True: 3589 if not self._curr: 3590 break 3591 3592 if self._match(TokenType.ON): 3593 action = None 3594 on = self._advance_any() and self._prev.text 3595 3596 if self._match_text_seq("NO", "ACTION"): 3597 action = "NO ACTION" 3598 elif self._match_text_seq("CASCADE"): 3599 action = "CASCADE" 3600 elif self._match_pair(TokenType.SET, TokenType.NULL): 3601 action = "SET NULL" 3602 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3603 action = "SET DEFAULT" 3604 else: 3605 self.raise_error("Invalid key constraint") 3606 3607 options.append(f"ON {on} {action}") 3608 elif self._match_text_seq("NOT", "ENFORCED"): 3609 options.append("NOT ENFORCED") 3610 elif self._match_text_seq("DEFERRABLE"): 3611 options.append("DEFERRABLE") 3612 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3613 options.append("INITIALLY DEFERRED") 3614 elif self._match_text_seq("NORELY"): 3615 options.append("NORELY") 3616 elif self._match_text_seq("MATCH", "FULL"): 3617 options.append("MATCH FULL") 3618 else: 3619 break 3620 3621 return options 3622 3623 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3624 if match and not self._match(TokenType.REFERENCES): 3625 return None 3626 3627 expressions = None 3628 this = self._parse_table(schema=True) 3629 options = self._parse_key_constraint_options() 3630 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3631 3632 def _parse_foreign_key(self) -> exp.ForeignKey: 3633 expressions = self._parse_wrapped_id_vars() 3634 reference = self._parse_references() 3635 options = {} 3636 3637 while self._match(TokenType.ON): 3638 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3639 self.raise_error("Expected DELETE or UPDATE") 3640 3641 kind = self._prev.text.lower() 3642 3643 if self._match_text_seq("NO", "ACTION"): 3644 action = "NO ACTION" 3645 elif self._match(TokenType.SET): 3646 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3647 action = "SET " + self._prev.text.upper() 3648 else: 3649 self._advance() 3650 action = self._prev.text.upper() 3651 3652 options[kind] = action 3653 3654 return self.expression( 3655 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3656 ) 3657 3658 def _parse_primary_key( 3659 self, wrapped_optional: bool = False, in_props: bool = False 3660 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3661 desc = ( 3662 self._match_set((TokenType.ASC, TokenType.DESC)) 3663 and self._prev.token_type == TokenType.DESC 3664 ) 3665 3666 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3667 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3668 3669 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3670 options = self._parse_key_constraint_options() 3671 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3672 3673 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3674 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3675 return this 3676 3677 bracket_kind = self._prev.token_type 3678 3679 if self._match(TokenType.COLON): 3680 expressions: t.List[t.Optional[exp.Expression]] = [ 3681 self.expression(exp.Slice, expression=self._parse_conjunction()) 3682 ] 3683 else: 3684 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3685 3686 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3687 if bracket_kind == TokenType.L_BRACE: 3688 this = self.expression(exp.Struct, expressions=expressions) 3689 elif not this or this.name.upper() == "ARRAY": 3690 this = self.expression(exp.Array, expressions=expressions) 3691 else: 3692 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3693 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3694 3695 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3696 self.raise_error("Expected ]") 3697 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3698 self.raise_error("Expected }") 3699 3700 self._add_comments(this) 3701 return self._parse_bracket(this) 3702 3703 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3704 if self._match(TokenType.COLON): 3705 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3706 return this 3707 3708 def _parse_case(self) -> t.Optional[exp.Expression]: 3709 ifs = [] 3710 default = None 3711 3712 expression = self._parse_conjunction() 3713 3714 while self._match(TokenType.WHEN): 3715 this = self._parse_conjunction() 3716 self._match(TokenType.THEN) 3717 then = self._parse_conjunction() 3718 ifs.append(self.expression(exp.If, this=this, true=then)) 3719 3720 if self._match(TokenType.ELSE): 3721 default = self._parse_conjunction() 3722 3723 if not self._match(TokenType.END): 3724 self.raise_error("Expected END after CASE", self._prev) 3725 3726 return self._parse_window( 3727 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3728 ) 3729 3730 def _parse_if(self) -> t.Optional[exp.Expression]: 3731 if self._match(TokenType.L_PAREN): 3732 args = self._parse_csv(self._parse_conjunction) 3733 this = self.validate_expression(exp.If.from_arg_list(args), args) 3734 self._match_r_paren() 3735 else: 3736 index = self._index - 1 3737 condition = self._parse_conjunction() 3738 3739 if not condition: 3740 self._retreat(index) 3741 return None 3742 3743 self._match(TokenType.THEN) 3744 true = self._parse_conjunction() 3745 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3746 self._match(TokenType.END) 3747 this = self.expression(exp.If, this=condition, true=true, false=false) 3748 3749 return self._parse_window(this) 3750 3751 def _parse_extract(self) -> exp.Extract: 3752 this = self._parse_function() or self._parse_var() or self._parse_type() 3753 3754 if self._match(TokenType.FROM): 3755 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3756 3757 if not self._match(TokenType.COMMA): 3758 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3759 3760 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3761 3762 def _parse_any_value(self) -> exp.AnyValue: 3763 this = self._parse_lambda() 3764 is_max = None 3765 having = None 3766 3767 if self._match(TokenType.HAVING): 3768 self._match_texts(("MAX", "MIN")) 3769 is_max = self._prev.text == "MAX" 3770 having = self._parse_column() 3771 3772 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3773 3774 def _parse_cast(self, strict: bool) -> exp.Expression: 3775 this = self._parse_conjunction() 3776 3777 if not self._match(TokenType.ALIAS): 3778 if self._match(TokenType.COMMA): 3779 return self.expression( 3780 exp.CastToStrType, this=this, expression=self._parse_string() 3781 ) 3782 else: 3783 self.raise_error("Expected AS after CAST") 3784 3785 fmt = None 3786 to = self._parse_types() 3787 3788 if not to: 3789 self.raise_error("Expected TYPE after CAST") 3790 elif to.this == exp.DataType.Type.CHAR: 3791 if self._match(TokenType.CHARACTER_SET): 3792 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3793 elif self._match(TokenType.FORMAT): 3794 fmt_string = self._parse_string() 3795 fmt = self._parse_at_time_zone(fmt_string) 3796 3797 if to.this in exp.DataType.TEMPORAL_TYPES: 3798 this = self.expression( 3799 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3800 this=this, 3801 format=exp.Literal.string( 3802 format_time( 3803 fmt_string.this if fmt_string else "", 3804 self.FORMAT_MAPPING or self.TIME_MAPPING, 3805 self.FORMAT_TRIE or self.TIME_TRIE, 3806 ) 3807 ), 3808 ) 3809 3810 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3811 this.set("zone", fmt.args["zone"]) 3812 3813 return this 3814 3815 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3816 3817 def _parse_concat(self) -> t.Optional[exp.Expression]: 3818 args = self._parse_csv(self._parse_conjunction) 3819 if self.CONCAT_NULL_OUTPUTS_STRING: 3820 args = [ 3821 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3822 for arg in args 3823 if arg 3824 ] 3825 3826 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3827 # we find such a call we replace it with its argument. 3828 if len(args) == 1: 3829 return args[0] 3830 3831 return self.expression( 3832 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3833 ) 3834 3835 def _parse_string_agg(self) -> exp.Expression: 3836 if self._match(TokenType.DISTINCT): 3837 args: t.List[t.Optional[exp.Expression]] = [ 3838 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3839 ] 3840 if self._match(TokenType.COMMA): 3841 args.extend(self._parse_csv(self._parse_conjunction)) 3842 else: 3843 args = self._parse_csv(self._parse_conjunction) 3844 3845 index = self._index 3846 if not self._match(TokenType.R_PAREN): 3847 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3848 return self.expression( 3849 exp.GroupConcat, 3850 this=seq_get(args, 0), 3851 separator=self._parse_order(this=seq_get(args, 1)), 3852 ) 3853 3854 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3855 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3856 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3857 if not self._match_text_seq("WITHIN", "GROUP"): 3858 self._retreat(index) 3859 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3860 3861 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3862 order = self._parse_order(this=seq_get(args, 0)) 3863 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3864 3865 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3866 this = self._parse_bitwise() 3867 3868 if self._match(TokenType.USING): 3869 to: t.Optional[exp.Expression] = self.expression( 3870 exp.CharacterSet, this=self._parse_var() 3871 ) 3872 elif self._match(TokenType.COMMA): 3873 to = self._parse_types() 3874 else: 3875 to = None 3876 3877 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3878 3879 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3880 """ 3881 There are generally two variants of the DECODE function: 3882 3883 - DECODE(bin, charset) 3884 - DECODE(expression, search, result [, search, result] ... [, default]) 3885 3886 The second variant will always be parsed into a CASE expression. Note that NULL 3887 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3888 instead of relying on pattern matching. 3889 """ 3890 args = self._parse_csv(self._parse_conjunction) 3891 3892 if len(args) < 3: 3893 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3894 3895 expression, *expressions = args 3896 if not expression: 3897 return None 3898 3899 ifs = [] 3900 for search, result in zip(expressions[::2], expressions[1::2]): 3901 if not search or not result: 3902 return None 3903 3904 if isinstance(search, exp.Literal): 3905 ifs.append( 3906 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3907 ) 3908 elif isinstance(search, exp.Null): 3909 ifs.append( 3910 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3911 ) 3912 else: 3913 cond = exp.or_( 3914 exp.EQ(this=expression.copy(), expression=search), 3915 exp.and_( 3916 exp.Is(this=expression.copy(), expression=exp.Null()), 3917 exp.Is(this=search.copy(), expression=exp.Null()), 3918 copy=False, 3919 ), 3920 copy=False, 3921 ) 3922 ifs.append(exp.If(this=cond, true=result)) 3923 3924 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3925 3926 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3927 self._match_text_seq("KEY") 3928 key = self._parse_field() 3929 self._match(TokenType.COLON) 3930 self._match_text_seq("VALUE") 3931 value = self._parse_field() 3932 3933 if not key and not value: 3934 return None 3935 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3936 3937 def _parse_json_object(self) -> exp.JSONObject: 3938 star = self._parse_star() 3939 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3940 3941 null_handling = None 3942 if self._match_text_seq("NULL", "ON", "NULL"): 3943 null_handling = "NULL ON NULL" 3944 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3945 null_handling = "ABSENT ON NULL" 3946 3947 unique_keys = None 3948 if self._match_text_seq("WITH", "UNIQUE"): 3949 unique_keys = True 3950 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3951 unique_keys = False 3952 3953 self._match_text_seq("KEYS") 3954 3955 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3956 format_json = self._match_text_seq("FORMAT", "JSON") 3957 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3958 3959 return self.expression( 3960 exp.JSONObject, 3961 expressions=expressions, 3962 null_handling=null_handling, 3963 unique_keys=unique_keys, 3964 return_type=return_type, 3965 format_json=format_json, 3966 encoding=encoding, 3967 ) 3968 3969 def _parse_logarithm(self) -> exp.Func: 3970 # Default argument order is base, expression 3971 args = self._parse_csv(self._parse_range) 3972 3973 if len(args) > 1: 3974 if not self.LOG_BASE_FIRST: 3975 args.reverse() 3976 return exp.Log.from_arg_list(args) 3977 3978 return self.expression( 3979 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3980 ) 3981 3982 def _parse_match_against(self) -> exp.MatchAgainst: 3983 expressions = self._parse_csv(self._parse_column) 3984 3985 self._match_text_seq(")", "AGAINST", "(") 3986 3987 this = self._parse_string() 3988 3989 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3990 modifier = "IN NATURAL LANGUAGE MODE" 3991 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3992 modifier = f"{modifier} WITH QUERY EXPANSION" 3993 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3994 modifier = "IN BOOLEAN MODE" 3995 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3996 modifier = "WITH QUERY EXPANSION" 3997 else: 3998 modifier = None 3999 4000 return self.expression( 4001 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4002 ) 4003 4004 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4005 def _parse_open_json(self) -> exp.OpenJSON: 4006 this = self._parse_bitwise() 4007 path = self._match(TokenType.COMMA) and self._parse_string() 4008 4009 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4010 this = self._parse_field(any_token=True) 4011 kind = self._parse_types() 4012 path = self._parse_string() 4013 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4014 4015 return self.expression( 4016 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4017 ) 4018 4019 expressions = None 4020 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4021 self._match_l_paren() 4022 expressions = self._parse_csv(_parse_open_json_column_def) 4023 4024 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4025 4026 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4027 args = self._parse_csv(self._parse_bitwise) 4028 4029 if self._match(TokenType.IN): 4030 return self.expression( 4031 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4032 ) 4033 4034 if haystack_first: 4035 haystack = seq_get(args, 0) 4036 needle = seq_get(args, 1) 4037 else: 4038 needle = seq_get(args, 0) 4039 haystack = seq_get(args, 1) 4040 4041 return self.expression( 4042 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4043 ) 4044 4045 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4046 args = self._parse_csv(self._parse_table) 4047 return exp.JoinHint(this=func_name.upper(), expressions=args) 4048 4049 def _parse_substring(self) -> exp.Substring: 4050 # Postgres supports the form: substring(string [from int] [for int]) 4051 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4052 4053 args = self._parse_csv(self._parse_bitwise) 4054 4055 if self._match(TokenType.FROM): 4056 args.append(self._parse_bitwise()) 4057 if self._match(TokenType.FOR): 4058 args.append(self._parse_bitwise()) 4059 4060 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4061 4062 def _parse_trim(self) -> exp.Trim: 4063 # https://www.w3resource.com/sql/character-functions/trim.php 4064 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4065 4066 position = None 4067 collation = None 4068 4069 if self._match_texts(self.TRIM_TYPES): 4070 position = self._prev.text.upper() 4071 4072 expression = self._parse_bitwise() 4073 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4074 this = self._parse_bitwise() 4075 else: 4076 this = expression 4077 expression = None 4078 4079 if self._match(TokenType.COLLATE): 4080 collation = self._parse_bitwise() 4081 4082 return self.expression( 4083 exp.Trim, this=this, position=position, expression=expression, collation=collation 4084 ) 4085 4086 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4087 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4088 4089 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4090 return self._parse_window(self._parse_id_var(), alias=True) 4091 4092 def _parse_respect_or_ignore_nulls( 4093 self, this: t.Optional[exp.Expression] 4094 ) -> t.Optional[exp.Expression]: 4095 if self._match_text_seq("IGNORE", "NULLS"): 4096 return self.expression(exp.IgnoreNulls, this=this) 4097 if self._match_text_seq("RESPECT", "NULLS"): 4098 return self.expression(exp.RespectNulls, this=this) 4099 return this 4100 4101 def _parse_window( 4102 self, this: t.Optional[exp.Expression], alias: bool = False 4103 ) -> t.Optional[exp.Expression]: 4104 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4105 self._match(TokenType.WHERE) 4106 this = self.expression( 4107 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4108 ) 4109 self._match_r_paren() 4110 4111 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4112 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4113 if self._match_text_seq("WITHIN", "GROUP"): 4114 order = self._parse_wrapped(self._parse_order) 4115 this = self.expression(exp.WithinGroup, this=this, expression=order) 4116 4117 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4118 # Some dialects choose to implement and some do not. 4119 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4120 4121 # There is some code above in _parse_lambda that handles 4122 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4123 4124 # The below changes handle 4125 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4126 4127 # Oracle allows both formats 4128 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4129 # and Snowflake chose to do the same for familiarity 4130 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4131 this = self._parse_respect_or_ignore_nulls(this) 4132 4133 # bigquery select from window x AS (partition by ...) 4134 if alias: 4135 over = None 4136 self._match(TokenType.ALIAS) 4137 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4138 return this 4139 else: 4140 over = self._prev.text.upper() 4141 4142 if not self._match(TokenType.L_PAREN): 4143 return self.expression( 4144 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4145 ) 4146 4147 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4148 4149 first = self._match(TokenType.FIRST) 4150 if self._match_text_seq("LAST"): 4151 first = False 4152 4153 partition = self._parse_partition_by() 4154 order = self._parse_order() 4155 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4156 4157 if kind: 4158 self._match(TokenType.BETWEEN) 4159 start = self._parse_window_spec() 4160 self._match(TokenType.AND) 4161 end = self._parse_window_spec() 4162 4163 spec = self.expression( 4164 exp.WindowSpec, 4165 kind=kind, 4166 start=start["value"], 4167 start_side=start["side"], 4168 end=end["value"], 4169 end_side=end["side"], 4170 ) 4171 else: 4172 spec = None 4173 4174 self._match_r_paren() 4175 4176 return self.expression( 4177 exp.Window, 4178 this=this, 4179 partition_by=partition, 4180 order=order, 4181 spec=spec, 4182 alias=window_alias, 4183 over=over, 4184 first=first, 4185 ) 4186 4187 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4188 self._match(TokenType.BETWEEN) 4189 4190 return { 4191 "value": ( 4192 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4193 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4194 or self._parse_bitwise() 4195 ), 4196 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4197 } 4198 4199 def _parse_alias( 4200 self, this: t.Optional[exp.Expression], explicit: bool = False 4201 ) -> t.Optional[exp.Expression]: 4202 any_token = self._match(TokenType.ALIAS) 4203 4204 if explicit and not any_token: 4205 return this 4206 4207 if self._match(TokenType.L_PAREN): 4208 aliases = self.expression( 4209 exp.Aliases, 4210 this=this, 4211 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4212 ) 4213 self._match_r_paren(aliases) 4214 return aliases 4215 4216 alias = self._parse_id_var(any_token) 4217 4218 if alias: 4219 return self.expression(exp.Alias, this=this, alias=alias) 4220 4221 return this 4222 4223 def _parse_id_var( 4224 self, 4225 any_token: bool = True, 4226 tokens: t.Optional[t.Collection[TokenType]] = None, 4227 ) -> t.Optional[exp.Expression]: 4228 identifier = self._parse_identifier() 4229 4230 if identifier: 4231 return identifier 4232 4233 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4234 quoted = self._prev.token_type == TokenType.STRING 4235 return exp.Identifier(this=self._prev.text, quoted=quoted) 4236 4237 return None 4238 4239 def _parse_string(self) -> t.Optional[exp.Expression]: 4240 if self._match(TokenType.STRING): 4241 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4242 return self._parse_placeholder() 4243 4244 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4245 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4246 4247 def _parse_number(self) -> t.Optional[exp.Expression]: 4248 if self._match(TokenType.NUMBER): 4249 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4250 return self._parse_placeholder() 4251 4252 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4253 if self._match(TokenType.IDENTIFIER): 4254 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4255 return self._parse_placeholder() 4256 4257 def _parse_var( 4258 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4259 ) -> t.Optional[exp.Expression]: 4260 if ( 4261 (any_token and self._advance_any()) 4262 or self._match(TokenType.VAR) 4263 or (self._match_set(tokens) if tokens else False) 4264 ): 4265 return self.expression(exp.Var, this=self._prev.text) 4266 return self._parse_placeholder() 4267 4268 def _advance_any(self) -> t.Optional[Token]: 4269 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4270 self._advance() 4271 return self._prev 4272 return None 4273 4274 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4275 return self._parse_var() or self._parse_string() 4276 4277 def _parse_null(self) -> t.Optional[exp.Expression]: 4278 if self._match(TokenType.NULL): 4279 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4280 return None 4281 4282 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4283 if self._match(TokenType.TRUE): 4284 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4285 if self._match(TokenType.FALSE): 4286 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4287 return None 4288 4289 def _parse_star(self) -> t.Optional[exp.Expression]: 4290 if self._match(TokenType.STAR): 4291 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4292 return None 4293 4294 def _parse_parameter(self) -> exp.Parameter: 4295 wrapped = self._match(TokenType.L_BRACE) 4296 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4297 self._match(TokenType.R_BRACE) 4298 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4299 4300 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4301 if self._match_set(self.PLACEHOLDER_PARSERS): 4302 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4303 if placeholder: 4304 return placeholder 4305 self._advance(-1) 4306 return None 4307 4308 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4309 if not self._match(TokenType.EXCEPT): 4310 return None 4311 if self._match(TokenType.L_PAREN, advance=False): 4312 return self._parse_wrapped_csv(self._parse_column) 4313 return self._parse_csv(self._parse_column) 4314 4315 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4316 if not self._match(TokenType.REPLACE): 4317 return None 4318 if self._match(TokenType.L_PAREN, advance=False): 4319 return self._parse_wrapped_csv(self._parse_expression) 4320 return self._parse_expressions() 4321 4322 def _parse_csv( 4323 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4324 ) -> t.List[t.Optional[exp.Expression]]: 4325 parse_result = parse_method() 4326 items = [parse_result] if parse_result is not None else [] 4327 4328 while self._match(sep): 4329 self._add_comments(parse_result) 4330 parse_result = parse_method() 4331 if parse_result is not None: 4332 items.append(parse_result) 4333 4334 return items 4335 4336 def _parse_tokens( 4337 self, parse_method: t.Callable, expressions: t.Dict 4338 ) -> t.Optional[exp.Expression]: 4339 this = parse_method() 4340 4341 while self._match_set(expressions): 4342 this = self.expression( 4343 expressions[self._prev.token_type], 4344 this=this, 4345 comments=self._prev_comments, 4346 expression=parse_method(), 4347 ) 4348 4349 return this 4350 4351 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4352 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4353 4354 def _parse_wrapped_csv( 4355 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4356 ) -> t.List[t.Optional[exp.Expression]]: 4357 return self._parse_wrapped( 4358 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4359 ) 4360 4361 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4362 wrapped = self._match(TokenType.L_PAREN) 4363 if not wrapped and not optional: 4364 self.raise_error("Expecting (") 4365 parse_result = parse_method() 4366 if wrapped: 4367 self._match_r_paren() 4368 return parse_result 4369 4370 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4371 return self._parse_csv(self._parse_expression) 4372 4373 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4374 return self._parse_select() or self._parse_set_operations( 4375 self._parse_expression() if alias else self._parse_conjunction() 4376 ) 4377 4378 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4379 return self._parse_query_modifiers( 4380 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4381 ) 4382 4383 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4384 this = None 4385 if self._match_texts(self.TRANSACTION_KIND): 4386 this = self._prev.text 4387 4388 self._match_texts({"TRANSACTION", "WORK"}) 4389 4390 modes = [] 4391 while True: 4392 mode = [] 4393 while self._match(TokenType.VAR): 4394 mode.append(self._prev.text) 4395 4396 if mode: 4397 modes.append(" ".join(mode)) 4398 if not self._match(TokenType.COMMA): 4399 break 4400 4401 return self.expression(exp.Transaction, this=this, modes=modes) 4402 4403 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4404 chain = None 4405 savepoint = None 4406 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4407 4408 self._match_texts({"TRANSACTION", "WORK"}) 4409 4410 if self._match_text_seq("TO"): 4411 self._match_text_seq("SAVEPOINT") 4412 savepoint = self._parse_id_var() 4413 4414 if self._match(TokenType.AND): 4415 chain = not self._match_text_seq("NO") 4416 self._match_text_seq("CHAIN") 4417 4418 if is_rollback: 4419 return self.expression(exp.Rollback, savepoint=savepoint) 4420 4421 return self.expression(exp.Commit, chain=chain) 4422 4423 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4424 if not self._match_text_seq("ADD"): 4425 return None 4426 4427 self._match(TokenType.COLUMN) 4428 exists_column = self._parse_exists(not_=True) 4429 expression = self._parse_column_def(self._parse_field(any_token=True)) 4430 4431 if expression: 4432 expression.set("exists", exists_column) 4433 4434 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4435 if self._match_texts(("FIRST", "AFTER")): 4436 position = self._prev.text 4437 column_position = self.expression( 4438 exp.ColumnPosition, this=self._parse_column(), position=position 4439 ) 4440 expression.set("position", column_position) 4441 4442 return expression 4443 4444 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4445 drop = self._match(TokenType.DROP) and self._parse_drop() 4446 if drop and not isinstance(drop, exp.Command): 4447 drop.set("kind", drop.args.get("kind", "COLUMN")) 4448 return drop 4449 4450 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4451 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4452 return self.expression( 4453 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4454 ) 4455 4456 def _parse_add_constraint(self) -> exp.AddConstraint: 4457 this = None 4458 kind = self._prev.token_type 4459 4460 if kind == TokenType.CONSTRAINT: 4461 this = self._parse_id_var() 4462 4463 if self._match_text_seq("CHECK"): 4464 expression = self._parse_wrapped(self._parse_conjunction) 4465 enforced = self._match_text_seq("ENFORCED") 4466 4467 return self.expression( 4468 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4469 ) 4470 4471 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4472 expression = self._parse_foreign_key() 4473 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4474 expression = self._parse_primary_key() 4475 else: 4476 expression = None 4477 4478 return self.expression(exp.AddConstraint, this=this, expression=expression) 4479 4480 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4481 index = self._index - 1 4482 4483 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4484 return self._parse_csv(self._parse_add_constraint) 4485 4486 self._retreat(index) 4487 return self._parse_csv(self._parse_add_column) 4488 4489 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4490 self._match(TokenType.COLUMN) 4491 column = self._parse_field(any_token=True) 4492 4493 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4494 return self.expression(exp.AlterColumn, this=column, drop=True) 4495 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4496 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4497 4498 self._match_text_seq("SET", "DATA") 4499 return self.expression( 4500 exp.AlterColumn, 4501 this=column, 4502 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4503 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4504 using=self._match(TokenType.USING) and self._parse_conjunction(), 4505 ) 4506 4507 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4508 index = self._index - 1 4509 4510 partition_exists = self._parse_exists() 4511 if self._match(TokenType.PARTITION, advance=False): 4512 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4513 4514 self._retreat(index) 4515 return self._parse_csv(self._parse_drop_column) 4516 4517 def _parse_alter_table_rename(self) -> exp.RenameTable: 4518 self._match_text_seq("TO") 4519 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4520 4521 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4522 start = self._prev 4523 4524 if not self._match(TokenType.TABLE): 4525 return self._parse_as_command(start) 4526 4527 exists = self._parse_exists() 4528 this = self._parse_table(schema=True) 4529 4530 if self._next: 4531 self._advance() 4532 4533 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4534 if parser: 4535 actions = ensure_list(parser(self)) 4536 4537 if not self._curr: 4538 return self.expression( 4539 exp.AlterTable, 4540 this=this, 4541 exists=exists, 4542 actions=actions, 4543 ) 4544 return self._parse_as_command(start) 4545 4546 def _parse_merge(self) -> exp.Merge: 4547 self._match(TokenType.INTO) 4548 target = self._parse_table() 4549 4550 self._match(TokenType.USING) 4551 using = self._parse_table() 4552 4553 self._match(TokenType.ON) 4554 on = self._parse_conjunction() 4555 4556 whens = [] 4557 while self._match(TokenType.WHEN): 4558 matched = not self._match(TokenType.NOT) 4559 self._match_text_seq("MATCHED") 4560 source = ( 4561 False 4562 if self._match_text_seq("BY", "TARGET") 4563 else self._match_text_seq("BY", "SOURCE") 4564 ) 4565 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4566 4567 self._match(TokenType.THEN) 4568 4569 if self._match(TokenType.INSERT): 4570 _this = self._parse_star() 4571 if _this: 4572 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4573 else: 4574 then = self.expression( 4575 exp.Insert, 4576 this=self._parse_value(), 4577 expression=self._match(TokenType.VALUES) and self._parse_value(), 4578 ) 4579 elif self._match(TokenType.UPDATE): 4580 expressions = self._parse_star() 4581 if expressions: 4582 then = self.expression(exp.Update, expressions=expressions) 4583 else: 4584 then = self.expression( 4585 exp.Update, 4586 expressions=self._match(TokenType.SET) 4587 and self._parse_csv(self._parse_equality), 4588 ) 4589 elif self._match(TokenType.DELETE): 4590 then = self.expression(exp.Var, this=self._prev.text) 4591 else: 4592 then = None 4593 4594 whens.append( 4595 self.expression( 4596 exp.When, 4597 matched=matched, 4598 source=source, 4599 condition=condition, 4600 then=then, 4601 ) 4602 ) 4603 4604 return self.expression( 4605 exp.Merge, 4606 this=target, 4607 using=using, 4608 on=on, 4609 expressions=whens, 4610 ) 4611 4612 def _parse_show(self) -> t.Optional[exp.Expression]: 4613 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4614 if parser: 4615 return parser(self) 4616 self._advance() 4617 return self.expression(exp.Show, this=self._prev.text.upper()) 4618 4619 def _parse_set_item_assignment( 4620 self, kind: t.Optional[str] = None 4621 ) -> t.Optional[exp.Expression]: 4622 index = self._index 4623 4624 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4625 return self._parse_set_transaction(global_=kind == "GLOBAL") 4626 4627 left = self._parse_primary() or self._parse_id_var() 4628 4629 if not self._match_texts(("=", "TO")): 4630 self._retreat(index) 4631 return None 4632 4633 right = self._parse_statement() or self._parse_id_var() 4634 this = self.expression(exp.EQ, this=left, expression=right) 4635 4636 return self.expression(exp.SetItem, this=this, kind=kind) 4637 4638 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4639 self._match_text_seq("TRANSACTION") 4640 characteristics = self._parse_csv( 4641 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4642 ) 4643 return self.expression( 4644 exp.SetItem, 4645 expressions=characteristics, 4646 kind="TRANSACTION", 4647 **{"global": global_}, # type: ignore 4648 ) 4649 4650 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4651 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4652 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4653 4654 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4655 index = self._index 4656 set_ = self.expression( 4657 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4658 ) 4659 4660 if self._curr: 4661 self._retreat(index) 4662 return self._parse_as_command(self._prev) 4663 4664 return set_ 4665 4666 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4667 for option in options: 4668 if self._match_text_seq(*option.split(" ")): 4669 return exp.var(option) 4670 return None 4671 4672 def _parse_as_command(self, start: Token) -> exp.Command: 4673 while self._curr: 4674 self._advance() 4675 text = self._find_sql(start, self._prev) 4676 size = len(start.text) 4677 return exp.Command(this=text[:size], expression=text[size:]) 4678 4679 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4680 settings = [] 4681 4682 self._match_l_paren() 4683 kind = self._parse_id_var() 4684 4685 if self._match(TokenType.L_PAREN): 4686 while True: 4687 key = self._parse_id_var() 4688 value = self._parse_primary() 4689 4690 if not key and value is None: 4691 break 4692 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4693 self._match(TokenType.R_PAREN) 4694 4695 self._match_r_paren() 4696 4697 return self.expression( 4698 exp.DictProperty, 4699 this=this, 4700 kind=kind.this if kind else None, 4701 settings=settings, 4702 ) 4703 4704 def _parse_dict_range(self, this: str) -> exp.DictRange: 4705 self._match_l_paren() 4706 has_min = self._match_text_seq("MIN") 4707 if has_min: 4708 min = self._parse_var() or self._parse_primary() 4709 self._match_text_seq("MAX") 4710 max = self._parse_var() or self._parse_primary() 4711 else: 4712 max = self._parse_var() or self._parse_primary() 4713 min = exp.Literal.number(0) 4714 self._match_r_paren() 4715 return self.expression(exp.DictRange, this=this, min=min, max=max) 4716 4717 def _find_parser( 4718 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4719 ) -> t.Optional[t.Callable]: 4720 if not self._curr: 4721 return None 4722 4723 index = self._index 4724 this = [] 4725 while True: 4726 # The current token might be multiple words 4727 curr = self._curr.text.upper() 4728 key = curr.split(" ") 4729 this.append(curr) 4730 4731 self._advance() 4732 result, trie = in_trie(trie, key) 4733 if result == TrieResult.FAILED: 4734 break 4735 4736 if result == TrieResult.EXISTS: 4737 subparser = parsers[" ".join(this)] 4738 return subparser 4739 4740 self._retreat(index) 4741 return None 4742 4743 def _match(self, token_type, advance=True, expression=None): 4744 if not self._curr: 4745 return None 4746 4747 if self._curr.token_type == token_type: 4748 if advance: 4749 self._advance() 4750 self._add_comments(expression) 4751 return True 4752 4753 return None 4754 4755 def _match_set(self, types, advance=True): 4756 if not self._curr: 4757 return None 4758 4759 if self._curr.token_type in types: 4760 if advance: 4761 self._advance() 4762 return True 4763 4764 return None 4765 4766 def _match_pair(self, token_type_a, token_type_b, advance=True): 4767 if not self._curr or not self._next: 4768 return None 4769 4770 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4771 if advance: 4772 self._advance(2) 4773 return True 4774 4775 return None 4776 4777 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4778 if not self._match(TokenType.L_PAREN, expression=expression): 4779 self.raise_error("Expecting (") 4780 4781 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4782 if not self._match(TokenType.R_PAREN, expression=expression): 4783 self.raise_error("Expecting )") 4784 4785 def _match_texts(self, texts, advance=True): 4786 if self._curr and self._curr.text.upper() in texts: 4787 if advance: 4788 self._advance() 4789 return True 4790 return False 4791 4792 def _match_text_seq(self, *texts, advance=True): 4793 index = self._index 4794 for text in texts: 4795 if self._curr and self._curr.text.upper() == text: 4796 self._advance() 4797 else: 4798 self._retreat(index) 4799 return False 4800 4801 if not advance: 4802 self._retreat(index) 4803 4804 return True 4805 4806 @t.overload 4807 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4808 ... 4809 4810 @t.overload 4811 def _replace_columns_with_dots( 4812 self, this: t.Optional[exp.Expression] 4813 ) -> t.Optional[exp.Expression]: 4814 ... 4815 4816 def _replace_columns_with_dots(self, this): 4817 if isinstance(this, exp.Dot): 4818 exp.replace_children(this, self._replace_columns_with_dots) 4819 elif isinstance(this, exp.Column): 4820 exp.replace_children(this, self._replace_columns_with_dots) 4821 table = this.args.get("table") 4822 this = ( 4823 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4824 ) 4825 4826 return this 4827 4828 def _replace_lambda( 4829 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4830 ) -> t.Optional[exp.Expression]: 4831 if not node: 4832 return node 4833 4834 for column in node.find_all(exp.Column): 4835 if column.parts[0].name in lambda_variables: 4836 dot_or_id = column.to_dot() if column.table else column.this 4837 parent = column.parent 4838 4839 while isinstance(parent, exp.Dot): 4840 if not isinstance(parent.parent, exp.Dot): 4841 parent.replace(dot_or_id) 4842 break 4843 parent = parent.parent 4844 else: 4845 if column is node: 4846 node = dot_or_id 4847 else: 4848 column.replace(dot_or_id) 4849 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
844 def __init__( 845 self, 846 error_level: t.Optional[ErrorLevel] = None, 847 error_message_context: int = 100, 848 max_errors: int = 3, 849 ): 850 self.error_level = error_level or ErrorLevel.IMMEDIATE 851 self.error_message_context = error_message_context 852 self.max_errors = max_errors 853 self.reset()
865 def parse( 866 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 867 ) -> t.List[t.Optional[exp.Expression]]: 868 """ 869 Parses a list of tokens and returns a list of syntax trees, one tree 870 per parsed SQL statement. 871 872 Args: 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The list of the produced syntax trees. 878 """ 879 return self._parse( 880 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 881 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
883 def parse_into( 884 self, 885 expression_types: exp.IntoType, 886 raw_tokens: t.List[Token], 887 sql: t.Optional[str] = None, 888 ) -> t.List[t.Optional[exp.Expression]]: 889 """ 890 Parses a list of tokens into a given Expression type. If a collection of Expression 891 types is given instead, this method will try to parse the token list into each one 892 of them, stopping at the first for which the parsing succeeds. 893 894 Args: 895 expression_types: The expression type(s) to try and parse the token list into. 896 raw_tokens: The list of tokens. 897 sql: The original SQL string, used to produce helpful debug messages. 898 899 Returns: 900 The target Expression. 901 """ 902 errors = [] 903 for expression_type in ensure_list(expression_types): 904 parser = self.EXPRESSION_PARSERS.get(expression_type) 905 if not parser: 906 raise TypeError(f"No parser registered for {expression_type}") 907 908 try: 909 return self._parse(parser, raw_tokens, sql) 910 except ParseError as e: 911 e.errors[0]["into_expression"] = expression_type 912 errors.append(e) 913 914 raise ParseError( 915 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 916 errors=merge_errors(errors), 917 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
954 def check_errors(self) -> None: 955 """Logs or raises any found errors, depending on the chosen error level setting.""" 956 if self.error_level == ErrorLevel.WARN: 957 for error in self.errors: 958 logger.error(str(error)) 959 elif self.error_level == ErrorLevel.RAISE and self.errors: 960 raise ParseError( 961 concat_messages(self.errors, self.max_errors), 962 errors=merge_errors(self.errors), 963 )
Logs or raises any found errors, depending on the chosen error level setting.
965 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 966 """ 967 Appends an error in the list of recorded errors or raises it, depending on the chosen 968 error level setting. 969 """ 970 token = token or self._curr or self._prev or Token.string("") 971 start = token.start 972 end = token.end + 1 973 start_context = self.sql[max(start - self.error_message_context, 0) : start] 974 highlight = self.sql[start:end] 975 end_context = self.sql[end : end + self.error_message_context] 976 977 error = ParseError.new( 978 f"{message}. Line {token.line}, Col: {token.col}.\n" 979 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 980 description=message, 981 line=token.line, 982 col=token.col, 983 start_context=start_context, 984 highlight=highlight, 985 end_context=end_context, 986 ) 987 988 if self.error_level == ErrorLevel.IMMEDIATE: 989 raise error 990 991 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
993 def expression( 994 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 995 ) -> E: 996 """ 997 Creates a new, validated Expression. 998 999 Args: 1000 exp_class: The expression class to instantiate. 1001 comments: An optional list of comments to attach to the expression. 1002 kwargs: The arguments to set for the expression along with their respective values. 1003 1004 Returns: 1005 The target expression. 1006 """ 1007 instance = exp_class(**kwargs) 1008 instance.add_comments(comments) if comments else self._add_comments(instance) 1009 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1016 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1017 """ 1018 Validates an Expression, making sure that all its mandatory arguments are set. 1019 1020 Args: 1021 expression: The expression to validate. 1022 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1023 1024 Returns: 1025 The validated expression. 1026 """ 1027 if self.error_level != ErrorLevel.IGNORE: 1028 for error_message in expression.error_messages(args): 1029 self.raise_error(error_message) 1030 1031 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.