sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 TYPE_TOKENS = { 113 TokenType.BIT, 114 TokenType.BOOLEAN, 115 TokenType.TINYINT, 116 TokenType.UTINYINT, 117 TokenType.SMALLINT, 118 TokenType.USMALLINT, 119 TokenType.INT, 120 TokenType.UINT, 121 TokenType.BIGINT, 122 TokenType.UBIGINT, 123 TokenType.INT128, 124 TokenType.UINT128, 125 TokenType.INT256, 126 TokenType.UINT256, 127 TokenType.FLOAT, 128 TokenType.DOUBLE, 129 TokenType.CHAR, 130 TokenType.NCHAR, 131 TokenType.VARCHAR, 132 TokenType.NVARCHAR, 133 TokenType.TEXT, 134 TokenType.MEDIUMTEXT, 135 TokenType.LONGTEXT, 136 TokenType.MEDIUMBLOB, 137 TokenType.LONGBLOB, 138 TokenType.BINARY, 139 TokenType.VARBINARY, 140 TokenType.JSON, 141 TokenType.JSONB, 142 TokenType.INTERVAL, 143 TokenType.TIME, 144 TokenType.TIMESTAMP, 145 TokenType.TIMESTAMPTZ, 146 TokenType.TIMESTAMPLTZ, 147 TokenType.DATETIME, 148 TokenType.DATETIME64, 149 TokenType.DATE, 150 TokenType.INT4RANGE, 151 TokenType.INT4MULTIRANGE, 152 TokenType.INT8RANGE, 153 TokenType.INT8MULTIRANGE, 154 TokenType.NUMRANGE, 155 TokenType.NUMMULTIRANGE, 156 TokenType.TSRANGE, 157 TokenType.TSMULTIRANGE, 158 TokenType.TSTZRANGE, 159 TokenType.TSTZMULTIRANGE, 160 TokenType.DATERANGE, 161 TokenType.DATEMULTIRANGE, 162 TokenType.DECIMAL, 163 TokenType.BIGDECIMAL, 164 TokenType.UUID, 165 TokenType.GEOGRAPHY, 166 TokenType.GEOMETRY, 167 TokenType.HLLSKETCH, 168 TokenType.HSTORE, 169 TokenType.PSEUDO_TYPE, 170 TokenType.SUPER, 171 TokenType.SERIAL, 172 TokenType.SMALLSERIAL, 173 TokenType.BIGSERIAL, 174 TokenType.XML, 175 TokenType.UNIQUEIDENTIFIER, 176 TokenType.MONEY, 177 TokenType.SMALLMONEY, 178 TokenType.ROWVERSION, 179 TokenType.IMAGE, 180 TokenType.VARIANT, 181 TokenType.OBJECT, 182 TokenType.INET, 183 *NESTED_TYPE_TOKENS, 184 } 185 186 SUBQUERY_PREDICATES = { 187 TokenType.ANY: exp.Any, 188 TokenType.ALL: exp.All, 189 TokenType.EXISTS: exp.Exists, 190 TokenType.SOME: exp.Any, 191 } 192 193 RESERVED_KEYWORDS = { 194 *Tokenizer.SINGLE_TOKENS.values(), 195 TokenType.SELECT, 196 } 197 198 DB_CREATABLES = { 199 TokenType.DATABASE, 200 TokenType.SCHEMA, 201 TokenType.TABLE, 202 TokenType.VIEW, 203 TokenType.DICTIONARY, 204 } 205 206 CREATABLES = { 207 TokenType.COLUMN, 208 TokenType.FUNCTION, 209 TokenType.INDEX, 210 TokenType.PROCEDURE, 211 *DB_CREATABLES, 212 } 213 214 # Tokens that can represent identifiers 215 ID_VAR_TOKENS = { 216 TokenType.VAR, 217 TokenType.ANTI, 218 TokenType.APPLY, 219 TokenType.ASC, 220 TokenType.AUTO_INCREMENT, 221 TokenType.BEGIN, 222 TokenType.CACHE, 223 TokenType.COLLATE, 224 TokenType.COMMAND, 225 TokenType.COMMENT, 226 TokenType.COMMIT, 227 TokenType.CONSTRAINT, 228 TokenType.DEFAULT, 229 TokenType.DELETE, 230 TokenType.DESC, 231 TokenType.DESCRIBE, 232 TokenType.DICTIONARY, 233 TokenType.DIV, 234 TokenType.END, 235 TokenType.EXECUTE, 236 TokenType.ESCAPE, 237 TokenType.FALSE, 238 TokenType.FIRST, 239 TokenType.FILTER, 240 TokenType.FORMAT, 241 TokenType.FULL, 242 TokenType.IF, 243 TokenType.IS, 244 TokenType.ISNULL, 245 TokenType.INTERVAL, 246 TokenType.KEEP, 247 TokenType.LEFT, 248 TokenType.LOAD, 249 TokenType.MERGE, 250 TokenType.NATURAL, 251 TokenType.NEXT, 252 TokenType.OFFSET, 253 TokenType.ORDINALITY, 254 TokenType.OVERWRITE, 255 TokenType.PARTITION, 256 TokenType.PERCENT, 257 TokenType.PIVOT, 258 TokenType.PRAGMA, 259 TokenType.RANGE, 260 TokenType.REFERENCES, 261 TokenType.RIGHT, 262 TokenType.ROW, 263 TokenType.ROWS, 264 TokenType.SEMI, 265 TokenType.SET, 266 TokenType.SETTINGS, 267 TokenType.SHOW, 268 TokenType.TEMPORARY, 269 TokenType.TOP, 270 TokenType.TRUE, 271 TokenType.UNIQUE, 272 TokenType.UNPIVOT, 273 TokenType.VOLATILE, 274 TokenType.WINDOW, 275 *CREATABLES, 276 *SUBQUERY_PREDICATES, 277 *TYPE_TOKENS, 278 *NO_PAREN_FUNCTIONS, 279 } 280 281 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 282 283 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 284 TokenType.APPLY, 285 TokenType.ASOF, 286 TokenType.FULL, 287 TokenType.LEFT, 288 TokenType.LOCK, 289 TokenType.NATURAL, 290 TokenType.OFFSET, 291 TokenType.RIGHT, 292 TokenType.WINDOW, 293 } 294 295 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 296 297 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 298 299 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 300 301 FUNC_TOKENS = { 302 TokenType.COMMAND, 303 TokenType.CURRENT_DATE, 304 TokenType.CURRENT_DATETIME, 305 TokenType.CURRENT_TIMESTAMP, 306 TokenType.CURRENT_TIME, 307 TokenType.CURRENT_USER, 308 TokenType.FILTER, 309 TokenType.FIRST, 310 TokenType.FORMAT, 311 TokenType.GLOB, 312 TokenType.IDENTIFIER, 313 TokenType.INDEX, 314 TokenType.ISNULL, 315 TokenType.ILIKE, 316 TokenType.LIKE, 317 TokenType.MERGE, 318 TokenType.OFFSET, 319 TokenType.PRIMARY_KEY, 320 TokenType.RANGE, 321 TokenType.REPLACE, 322 TokenType.ROW, 323 TokenType.UNNEST, 324 TokenType.VAR, 325 TokenType.LEFT, 326 TokenType.RIGHT, 327 TokenType.DATE, 328 TokenType.DATETIME, 329 TokenType.TABLE, 330 TokenType.TIMESTAMP, 331 TokenType.TIMESTAMPTZ, 332 TokenType.WINDOW, 333 *TYPE_TOKENS, 334 *SUBQUERY_PREDICATES, 335 } 336 337 CONJUNCTION = { 338 TokenType.AND: exp.And, 339 TokenType.OR: exp.Or, 340 } 341 342 EQUALITY = { 343 TokenType.EQ: exp.EQ, 344 TokenType.NEQ: exp.NEQ, 345 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 346 } 347 348 COMPARISON = { 349 TokenType.GT: exp.GT, 350 TokenType.GTE: exp.GTE, 351 TokenType.LT: exp.LT, 352 TokenType.LTE: exp.LTE, 353 } 354 355 BITWISE = { 356 TokenType.AMP: exp.BitwiseAnd, 357 TokenType.CARET: exp.BitwiseXor, 358 TokenType.PIPE: exp.BitwiseOr, 359 TokenType.DPIPE: exp.DPipe, 360 } 361 362 TERM = { 363 TokenType.DASH: exp.Sub, 364 TokenType.PLUS: exp.Add, 365 TokenType.MOD: exp.Mod, 366 TokenType.COLLATE: exp.Collate, 367 } 368 369 FACTOR = { 370 TokenType.DIV: exp.IntDiv, 371 TokenType.LR_ARROW: exp.Distance, 372 TokenType.SLASH: exp.Div, 373 TokenType.STAR: exp.Mul, 374 } 375 376 TIMESTAMPS = { 377 TokenType.TIME, 378 TokenType.TIMESTAMP, 379 TokenType.TIMESTAMPTZ, 380 TokenType.TIMESTAMPLTZ, 381 } 382 383 SET_OPERATIONS = { 384 TokenType.UNION, 385 TokenType.INTERSECT, 386 TokenType.EXCEPT, 387 } 388 389 JOIN_METHODS = { 390 TokenType.NATURAL, 391 TokenType.ASOF, 392 } 393 394 JOIN_SIDES = { 395 TokenType.LEFT, 396 TokenType.RIGHT, 397 TokenType.FULL, 398 } 399 400 JOIN_KINDS = { 401 TokenType.INNER, 402 TokenType.OUTER, 403 TokenType.CROSS, 404 TokenType.SEMI, 405 TokenType.ANTI, 406 } 407 408 JOIN_HINTS: t.Set[str] = set() 409 410 LAMBDAS = { 411 TokenType.ARROW: lambda self, expressions: self.expression( 412 exp.Lambda, 413 this=self._replace_lambda( 414 self._parse_conjunction(), 415 {node.name for node in expressions}, 416 ), 417 expressions=expressions, 418 ), 419 TokenType.FARROW: lambda self, expressions: self.expression( 420 exp.Kwarg, 421 this=exp.var(expressions[0].name), 422 expression=self._parse_conjunction(), 423 ), 424 } 425 426 COLUMN_OPERATORS = { 427 TokenType.DOT: None, 428 TokenType.DCOLON: lambda self, this, to: self.expression( 429 exp.Cast if self.STRICT_CAST else exp.TryCast, 430 this=this, 431 to=to, 432 ), 433 TokenType.ARROW: lambda self, this, path: self.expression( 434 exp.JSONExtract, 435 this=this, 436 expression=path, 437 ), 438 TokenType.DARROW: lambda self, this, path: self.expression( 439 exp.JSONExtractScalar, 440 this=this, 441 expression=path, 442 ), 443 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 444 exp.JSONBExtract, 445 this=this, 446 expression=path, 447 ), 448 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 449 exp.JSONBExtractScalar, 450 this=this, 451 expression=path, 452 ), 453 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 454 exp.JSONBContains, 455 this=this, 456 expression=key, 457 ), 458 } 459 460 EXPRESSION_PARSERS = { 461 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 462 exp.Column: lambda self: self._parse_column(), 463 exp.Condition: lambda self: self._parse_conjunction(), 464 exp.DataType: lambda self: self._parse_types(), 465 exp.Expression: lambda self: self._parse_statement(), 466 exp.From: lambda self: self._parse_from(), 467 exp.Group: lambda self: self._parse_group(), 468 exp.Having: lambda self: self._parse_having(), 469 exp.Identifier: lambda self: self._parse_id_var(), 470 exp.Join: lambda self: self._parse_join(), 471 exp.Lambda: lambda self: self._parse_lambda(), 472 exp.Lateral: lambda self: self._parse_lateral(), 473 exp.Limit: lambda self: self._parse_limit(), 474 exp.Offset: lambda self: self._parse_offset(), 475 exp.Order: lambda self: self._parse_order(), 476 exp.Ordered: lambda self: self._parse_ordered(), 477 exp.Properties: lambda self: self._parse_properties(), 478 exp.Qualify: lambda self: self._parse_qualify(), 479 exp.Returning: lambda self: self._parse_returning(), 480 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 481 exp.Table: lambda self: self._parse_table_parts(), 482 exp.TableAlias: lambda self: self._parse_table_alias(), 483 exp.Where: lambda self: self._parse_where(), 484 exp.Window: lambda self: self._parse_named_window(), 485 exp.With: lambda self: self._parse_with(), 486 "JOIN_TYPE": lambda self: self._parse_join_parts(), 487 } 488 489 STATEMENT_PARSERS = { 490 TokenType.ALTER: lambda self: self._parse_alter(), 491 TokenType.BEGIN: lambda self: self._parse_transaction(), 492 TokenType.CACHE: lambda self: self._parse_cache(), 493 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 494 TokenType.COMMENT: lambda self: self._parse_comment(), 495 TokenType.CREATE: lambda self: self._parse_create(), 496 TokenType.DELETE: lambda self: self._parse_delete(), 497 TokenType.DESC: lambda self: self._parse_describe(), 498 TokenType.DESCRIBE: lambda self: self._parse_describe(), 499 TokenType.DROP: lambda self: self._parse_drop(), 500 TokenType.END: lambda self: self._parse_commit_or_rollback(), 501 TokenType.FROM: lambda self: exp.select("*").from_( 502 t.cast(exp.From, self._parse_from(skip_from_token=True)) 503 ), 504 TokenType.INSERT: lambda self: self._parse_insert(), 505 TokenType.LOAD: lambda self: self._parse_load(), 506 TokenType.MERGE: lambda self: self._parse_merge(), 507 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 508 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 509 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 510 TokenType.SET: lambda self: self._parse_set(), 511 TokenType.UNCACHE: lambda self: self._parse_uncache(), 512 TokenType.UPDATE: lambda self: self._parse_update(), 513 TokenType.USE: lambda self: self.expression( 514 exp.Use, 515 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 516 and exp.var(self._prev.text), 517 this=self._parse_table(schema=False), 518 ), 519 } 520 521 UNARY_PARSERS = { 522 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 523 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 524 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 525 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 526 } 527 528 PRIMARY_PARSERS = { 529 TokenType.STRING: lambda self, token: self.expression( 530 exp.Literal, this=token.text, is_string=True 531 ), 532 TokenType.NUMBER: lambda self, token: self.expression( 533 exp.Literal, this=token.text, is_string=False 534 ), 535 TokenType.STAR: lambda self, _: self.expression( 536 exp.Star, 537 **{"except": self._parse_except(), "replace": self._parse_replace()}, 538 ), 539 TokenType.NULL: lambda self, _: self.expression(exp.Null), 540 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 541 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 542 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 543 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 544 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 545 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 546 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 547 exp.National, this=token.text 548 ), 549 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 550 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 551 } 552 553 PLACEHOLDER_PARSERS = { 554 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 555 TokenType.PARAMETER: lambda self: self._parse_parameter(), 556 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 557 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 558 else None, 559 } 560 561 RANGE_PARSERS = { 562 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 563 TokenType.GLOB: binary_range_parser(exp.Glob), 564 TokenType.ILIKE: binary_range_parser(exp.ILike), 565 TokenType.IN: lambda self, this: self._parse_in(this), 566 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 567 TokenType.IS: lambda self, this: self._parse_is(this), 568 TokenType.LIKE: binary_range_parser(exp.Like), 569 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 570 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 571 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 572 } 573 574 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 575 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 576 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 577 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 578 "CHARACTER SET": lambda self: self._parse_character_set(), 579 "CHECKSUM": lambda self: self._parse_checksum(), 580 "CLUSTER": lambda self: self._parse_cluster(), 581 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 582 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 583 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 584 "DEFINER": lambda self: self._parse_definer(), 585 "DETERMINISTIC": lambda self: self.expression( 586 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 587 ), 588 "DISTKEY": lambda self: self._parse_distkey(), 589 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 590 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 591 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 592 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 593 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 594 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 595 "FREESPACE": lambda self: self._parse_freespace(), 596 "IMMUTABLE": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 600 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 601 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 602 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 603 "LIKE": lambda self: self._parse_create_like(), 604 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 605 "LOCK": lambda self: self._parse_locking(), 606 "LOCKING": lambda self: self._parse_locking(), 607 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 608 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 609 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 610 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 611 "NO": lambda self: self._parse_no_property(), 612 "ON": lambda self: self._parse_on_property(), 613 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 614 "PARTITION BY": lambda self: self._parse_partitioned_by(), 615 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 616 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 617 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 618 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 619 "RETURNS": lambda self: self._parse_returns(), 620 "ROW": lambda self: self._parse_row(), 621 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 622 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 623 "SETTINGS": lambda self: self.expression( 624 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 625 ), 626 "SORTKEY": lambda self: self._parse_sortkey(), 627 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 628 "STABLE": lambda self: self.expression( 629 exp.StabilityProperty, this=exp.Literal.string("STABLE") 630 ), 631 "STORED": lambda self: self._parse_stored(), 632 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 633 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 634 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 635 "TO": lambda self: self._parse_to_table(), 636 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 637 "TTL": lambda self: self._parse_ttl(), 638 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 639 "VOLATILE": lambda self: self._parse_volatile_property(), 640 "WITH": lambda self: self._parse_with_property(), 641 } 642 643 CONSTRAINT_PARSERS = { 644 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 645 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 646 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 647 "CHARACTER SET": lambda self: self.expression( 648 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 649 ), 650 "CHECK": lambda self: self.expression( 651 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 652 ), 653 "COLLATE": lambda self: self.expression( 654 exp.CollateColumnConstraint, this=self._parse_var() 655 ), 656 "COMMENT": lambda self: self.expression( 657 exp.CommentColumnConstraint, this=self._parse_string() 658 ), 659 "COMPRESS": lambda self: self._parse_compress(), 660 "DEFAULT": lambda self: self.expression( 661 exp.DefaultColumnConstraint, this=self._parse_bitwise() 662 ), 663 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 664 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 665 "FORMAT": lambda self: self.expression( 666 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 667 ), 668 "GENERATED": lambda self: self._parse_generated_as_identity(), 669 "IDENTITY": lambda self: self._parse_auto_increment(), 670 "INLINE": lambda self: self._parse_inline(), 671 "LIKE": lambda self: self._parse_create_like(), 672 "NOT": lambda self: self._parse_not_constraint(), 673 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 674 "ON": lambda self: self._match(TokenType.UPDATE) 675 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 676 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 677 "PRIMARY KEY": lambda self: self._parse_primary_key(), 678 "REFERENCES": lambda self: self._parse_references(match=False), 679 "TITLE": lambda self: self.expression( 680 exp.TitleColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 683 "UNIQUE": lambda self: self._parse_unique(), 684 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 685 } 686 687 ALTER_PARSERS = { 688 "ADD": lambda self: self._parse_alter_table_add(), 689 "ALTER": lambda self: self._parse_alter_table_alter(), 690 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 691 "DROP": lambda self: self._parse_alter_table_drop(), 692 "RENAME": lambda self: self._parse_alter_table_rename(), 693 } 694 695 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 696 697 NO_PAREN_FUNCTION_PARSERS = { 698 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 699 TokenType.CASE: lambda self: self._parse_case(), 700 TokenType.IF: lambda self: self._parse_if(), 701 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 702 exp.NextValueFor, 703 this=self._parse_column(), 704 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 705 ), 706 } 707 708 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 709 710 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 711 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 712 "CONCAT": lambda self: self._parse_concat(), 713 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 714 "DECODE": lambda self: self._parse_decode(), 715 "EXTRACT": lambda self: self._parse_extract(), 716 "JSON_OBJECT": lambda self: self._parse_json_object(), 717 "LOG": lambda self: self._parse_logarithm(), 718 "MATCH": lambda self: self._parse_match_against(), 719 "OPENJSON": lambda self: self._parse_open_json(), 720 "POSITION": lambda self: self._parse_position(), 721 "SAFE_CAST": lambda self: self._parse_cast(False), 722 "STRING_AGG": lambda self: self._parse_string_agg(), 723 "SUBSTRING": lambda self: self._parse_substring(), 724 "TRIM": lambda self: self._parse_trim(), 725 "TRY_CAST": lambda self: self._parse_cast(False), 726 "TRY_CONVERT": lambda self: self._parse_convert(False), 727 } 728 729 QUERY_MODIFIER_PARSERS = { 730 "joins": lambda self: list(iter(self._parse_join, None)), 731 "laterals": lambda self: list(iter(self._parse_lateral, None)), 732 "match": lambda self: self._parse_match_recognize(), 733 "where": lambda self: self._parse_where(), 734 "group": lambda self: self._parse_group(), 735 "having": lambda self: self._parse_having(), 736 "qualify": lambda self: self._parse_qualify(), 737 "windows": lambda self: self._parse_window_clause(), 738 "order": lambda self: self._parse_order(), 739 "limit": lambda self: self._parse_limit(), 740 "offset": lambda self: self._parse_offset(), 741 "locks": lambda self: self._parse_locks(), 742 "sample": lambda self: self._parse_table_sample(as_modifier=True), 743 } 744 745 SET_PARSERS = { 746 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 747 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 748 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 749 "TRANSACTION": lambda self: self._parse_set_transaction(), 750 } 751 752 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 753 754 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 755 756 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 757 758 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 759 760 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 761 762 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 763 TRANSACTION_CHARACTERISTICS = { 764 "ISOLATION LEVEL REPEATABLE READ", 765 "ISOLATION LEVEL READ COMMITTED", 766 "ISOLATION LEVEL READ UNCOMMITTED", 767 "ISOLATION LEVEL SERIALIZABLE", 768 "READ WRITE", 769 "READ ONLY", 770 } 771 772 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 773 774 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 775 776 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 777 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 778 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 779 780 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 781 782 STRICT_CAST = True 783 784 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 785 786 CONVERT_TYPE_FIRST = False 787 788 PREFIXED_PIVOT_COLUMNS = False 789 IDENTIFY_PIVOT_STRINGS = False 790 791 LOG_BASE_FIRST = True 792 LOG_DEFAULTS_TO_LN = False 793 794 __slots__ = ( 795 "error_level", 796 "error_message_context", 797 "max_errors", 798 "sql", 799 "errors", 800 "_tokens", 801 "_index", 802 "_curr", 803 "_next", 804 "_prev", 805 "_prev_comments", 806 ) 807 808 # Autofilled 809 INDEX_OFFSET: int = 0 810 UNNEST_COLUMN_ONLY: bool = False 811 ALIAS_POST_TABLESAMPLE: bool = False 812 STRICT_STRING_CONCAT = False 813 NULL_ORDERING: str = "nulls_are_small" 814 SHOW_TRIE: t.Dict = {} 815 SET_TRIE: t.Dict = {} 816 FORMAT_MAPPING: t.Dict[str, str] = {} 817 FORMAT_TRIE: t.Dict = {} 818 TIME_MAPPING: t.Dict[str, str] = {} 819 TIME_TRIE: t.Dict = {} 820 821 def __init__( 822 self, 823 error_level: t.Optional[ErrorLevel] = None, 824 error_message_context: int = 100, 825 max_errors: int = 3, 826 ): 827 self.error_level = error_level or ErrorLevel.IMMEDIATE 828 self.error_message_context = error_message_context 829 self.max_errors = max_errors 830 self.reset() 831 832 def reset(self): 833 self.sql = "" 834 self.errors = [] 835 self._tokens = [] 836 self._index = 0 837 self._curr = None 838 self._next = None 839 self._prev = None 840 self._prev_comments = None 841 842 def parse( 843 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 844 ) -> t.List[t.Optional[exp.Expression]]: 845 """ 846 Parses a list of tokens and returns a list of syntax trees, one tree 847 per parsed SQL statement. 848 849 Args: 850 raw_tokens: The list of tokens. 851 sql: The original SQL string, used to produce helpful debug messages. 852 853 Returns: 854 The list of the produced syntax trees. 855 """ 856 return self._parse( 857 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 858 ) 859 860 def parse_into( 861 self, 862 expression_types: exp.IntoType, 863 raw_tokens: t.List[Token], 864 sql: t.Optional[str] = None, 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens into a given Expression type. If a collection of Expression 868 types is given instead, this method will try to parse the token list into each one 869 of them, stopping at the first for which the parsing succeeds. 870 871 Args: 872 expression_types: The expression type(s) to try and parse the token list into. 873 raw_tokens: The list of tokens. 874 sql: The original SQL string, used to produce helpful debug messages. 875 876 Returns: 877 The target Expression. 878 """ 879 errors = [] 880 for expression_type in ensure_list(expression_types): 881 parser = self.EXPRESSION_PARSERS.get(expression_type) 882 if not parser: 883 raise TypeError(f"No parser registered for {expression_type}") 884 885 try: 886 return self._parse(parser, raw_tokens, sql) 887 except ParseError as e: 888 e.errors[0]["into_expression"] = expression_type 889 errors.append(e) 890 891 raise ParseError( 892 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 893 errors=merge_errors(errors), 894 ) from errors[-1] 895 896 def _parse( 897 self, 898 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 899 raw_tokens: t.List[Token], 900 sql: t.Optional[str] = None, 901 ) -> t.List[t.Optional[exp.Expression]]: 902 self.reset() 903 self.sql = sql or "" 904 905 total = len(raw_tokens) 906 chunks: t.List[t.List[Token]] = [[]] 907 908 for i, token in enumerate(raw_tokens): 909 if token.token_type == TokenType.SEMICOLON: 910 if i < total - 1: 911 chunks.append([]) 912 else: 913 chunks[-1].append(token) 914 915 expressions = [] 916 917 for tokens in chunks: 918 self._index = -1 919 self._tokens = tokens 920 self._advance() 921 922 expressions.append(parse_method(self)) 923 924 if self._index < len(self._tokens): 925 self.raise_error("Invalid expression / Unexpected token") 926 927 self.check_errors() 928 929 return expressions 930 931 def check_errors(self) -> None: 932 """Logs or raises any found errors, depending on the chosen error level setting.""" 933 if self.error_level == ErrorLevel.WARN: 934 for error in self.errors: 935 logger.error(str(error)) 936 elif self.error_level == ErrorLevel.RAISE and self.errors: 937 raise ParseError( 938 concat_messages(self.errors, self.max_errors), 939 errors=merge_errors(self.errors), 940 ) 941 942 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 943 """ 944 Appends an error in the list of recorded errors or raises it, depending on the chosen 945 error level setting. 946 """ 947 token = token or self._curr or self._prev or Token.string("") 948 start = token.start 949 end = token.end + 1 950 start_context = self.sql[max(start - self.error_message_context, 0) : start] 951 highlight = self.sql[start:end] 952 end_context = self.sql[end : end + self.error_message_context] 953 954 error = ParseError.new( 955 f"{message}. Line {token.line}, Col: {token.col}.\n" 956 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 957 description=message, 958 line=token.line, 959 col=token.col, 960 start_context=start_context, 961 highlight=highlight, 962 end_context=end_context, 963 ) 964 965 if self.error_level == ErrorLevel.IMMEDIATE: 966 raise error 967 968 self.errors.append(error) 969 970 def expression( 971 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 972 ) -> E: 973 """ 974 Creates a new, validated Expression. 975 976 Args: 977 exp_class: The expression class to instantiate. 978 comments: An optional list of comments to attach to the expression. 979 kwargs: The arguments to set for the expression along with their respective values. 980 981 Returns: 982 The target expression. 983 """ 984 instance = exp_class(**kwargs) 985 instance.add_comments(comments) if comments else self._add_comments(instance) 986 return self.validate_expression(instance) 987 988 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 989 if expression and self._prev_comments: 990 expression.add_comments(self._prev_comments) 991 self._prev_comments = None 992 993 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 994 """ 995 Validates an Expression, making sure that all its mandatory arguments are set. 996 997 Args: 998 expression: The expression to validate. 999 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1000 1001 Returns: 1002 The validated expression. 1003 """ 1004 if self.error_level != ErrorLevel.IGNORE: 1005 for error_message in expression.error_messages(args): 1006 self.raise_error(error_message) 1007 1008 return expression 1009 1010 def _find_sql(self, start: Token, end: Token) -> str: 1011 return self.sql[start.start : end.end + 1] 1012 1013 def _advance(self, times: int = 1) -> None: 1014 self._index += times 1015 self._curr = seq_get(self._tokens, self._index) 1016 self._next = seq_get(self._tokens, self._index + 1) 1017 1018 if self._index > 0: 1019 self._prev = self._tokens[self._index - 1] 1020 self._prev_comments = self._prev.comments 1021 else: 1022 self._prev = None 1023 self._prev_comments = None 1024 1025 def _retreat(self, index: int) -> None: 1026 if index != self._index: 1027 self._advance(index - self._index) 1028 1029 def _parse_command(self) -> exp.Command: 1030 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1031 1032 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1033 start = self._prev 1034 exists = self._parse_exists() if allow_exists else None 1035 1036 self._match(TokenType.ON) 1037 1038 kind = self._match_set(self.CREATABLES) and self._prev 1039 if not kind: 1040 return self._parse_as_command(start) 1041 1042 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1043 this = self._parse_user_defined_function(kind=kind.token_type) 1044 elif kind.token_type == TokenType.TABLE: 1045 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1046 elif kind.token_type == TokenType.COLUMN: 1047 this = self._parse_column() 1048 else: 1049 this = self._parse_id_var() 1050 1051 self._match(TokenType.IS) 1052 1053 return self.expression( 1054 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1055 ) 1056 1057 def _parse_to_table( 1058 self, 1059 ) -> exp.ToTableProperty: 1060 table = self._parse_table_parts(schema=True) 1061 return self.expression(exp.ToTableProperty, this=table) 1062 1063 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1064 def _parse_ttl(self) -> exp.Expression: 1065 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1066 this = self._parse_bitwise() 1067 1068 if self._match_text_seq("DELETE"): 1069 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1070 if self._match_text_seq("RECOMPRESS"): 1071 return self.expression( 1072 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1073 ) 1074 if self._match_text_seq("TO", "DISK"): 1075 return self.expression( 1076 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1077 ) 1078 if self._match_text_seq("TO", "VOLUME"): 1079 return self.expression( 1080 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1081 ) 1082 1083 return this 1084 1085 expressions = self._parse_csv(_parse_ttl_action) 1086 where = self._parse_where() 1087 group = self._parse_group() 1088 1089 aggregates = None 1090 if group and self._match(TokenType.SET): 1091 aggregates = self._parse_csv(self._parse_set_item) 1092 1093 return self.expression( 1094 exp.MergeTreeTTL, 1095 expressions=expressions, 1096 where=where, 1097 group=group, 1098 aggregates=aggregates, 1099 ) 1100 1101 def _parse_statement(self) -> t.Optional[exp.Expression]: 1102 if self._curr is None: 1103 return None 1104 1105 if self._match_set(self.STATEMENT_PARSERS): 1106 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1107 1108 if self._match_set(Tokenizer.COMMANDS): 1109 return self._parse_command() 1110 1111 expression = self._parse_expression() 1112 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1113 return self._parse_query_modifiers(expression) 1114 1115 def _parse_drop(self) -> exp.Drop | exp.Command: 1116 start = self._prev 1117 temporary = self._match(TokenType.TEMPORARY) 1118 materialized = self._match_text_seq("MATERIALIZED") 1119 1120 kind = self._match_set(self.CREATABLES) and self._prev.text 1121 if not kind: 1122 return self._parse_as_command(start) 1123 1124 return self.expression( 1125 exp.Drop, 1126 exists=self._parse_exists(), 1127 this=self._parse_table(schema=True), 1128 kind=kind, 1129 temporary=temporary, 1130 materialized=materialized, 1131 cascade=self._match_text_seq("CASCADE"), 1132 constraints=self._match_text_seq("CONSTRAINTS"), 1133 purge=self._match_text_seq("PURGE"), 1134 ) 1135 1136 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1137 return ( 1138 self._match(TokenType.IF) 1139 and (not not_ or self._match(TokenType.NOT)) 1140 and self._match(TokenType.EXISTS) 1141 ) 1142 1143 def _parse_create(self) -> exp.Create | exp.Command: 1144 # Note: this can't be None because we've matched a statement parser 1145 start = self._prev 1146 replace = start.text.upper() == "REPLACE" or self._match_pair( 1147 TokenType.OR, TokenType.REPLACE 1148 ) 1149 unique = self._match(TokenType.UNIQUE) 1150 1151 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1152 self._advance() 1153 1154 properties = None 1155 create_token = self._match_set(self.CREATABLES) and self._prev 1156 1157 if not create_token: 1158 # exp.Properties.Location.POST_CREATE 1159 properties = self._parse_properties() 1160 create_token = self._match_set(self.CREATABLES) and self._prev 1161 1162 if not properties or not create_token: 1163 return self._parse_as_command(start) 1164 1165 exists = self._parse_exists(not_=True) 1166 this = None 1167 expression = None 1168 indexes = None 1169 no_schema_binding = None 1170 begin = None 1171 clone = None 1172 1173 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1174 nonlocal properties 1175 if properties and temp_props: 1176 properties.expressions.extend(temp_props.expressions) 1177 elif temp_props: 1178 properties = temp_props 1179 1180 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1181 this = self._parse_user_defined_function(kind=create_token.token_type) 1182 1183 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1184 extend_props(self._parse_properties()) 1185 1186 self._match(TokenType.ALIAS) 1187 begin = self._match(TokenType.BEGIN) 1188 return_ = self._match_text_seq("RETURN") 1189 expression = self._parse_statement() 1190 1191 if return_: 1192 expression = self.expression(exp.Return, this=expression) 1193 elif create_token.token_type == TokenType.INDEX: 1194 this = self._parse_index(index=self._parse_id_var()) 1195 elif create_token.token_type in self.DB_CREATABLES: 1196 table_parts = self._parse_table_parts(schema=True) 1197 1198 # exp.Properties.Location.POST_NAME 1199 self._match(TokenType.COMMA) 1200 extend_props(self._parse_properties(before=True)) 1201 1202 this = self._parse_schema(this=table_parts) 1203 1204 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1205 extend_props(self._parse_properties()) 1206 1207 self._match(TokenType.ALIAS) 1208 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1209 # exp.Properties.Location.POST_ALIAS 1210 extend_props(self._parse_properties()) 1211 1212 expression = self._parse_ddl_select() 1213 1214 if create_token.token_type == TokenType.TABLE: 1215 indexes = [] 1216 while True: 1217 index = self._parse_index() 1218 1219 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1220 extend_props(self._parse_properties()) 1221 1222 if not index: 1223 break 1224 else: 1225 self._match(TokenType.COMMA) 1226 indexes.append(index) 1227 elif create_token.token_type == TokenType.VIEW: 1228 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1229 no_schema_binding = True 1230 1231 if self._match_text_seq("CLONE"): 1232 clone = self._parse_table(schema=True) 1233 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1234 clone_kind = ( 1235 self._match(TokenType.L_PAREN) 1236 and self._match_texts(self.CLONE_KINDS) 1237 and self._prev.text.upper() 1238 ) 1239 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1240 self._match(TokenType.R_PAREN) 1241 clone = self.expression( 1242 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1243 ) 1244 1245 return self.expression( 1246 exp.Create, 1247 this=this, 1248 kind=create_token.text, 1249 replace=replace, 1250 unique=unique, 1251 expression=expression, 1252 exists=exists, 1253 properties=properties, 1254 indexes=indexes, 1255 no_schema_binding=no_schema_binding, 1256 begin=begin, 1257 clone=clone, 1258 ) 1259 1260 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1261 # only used for teradata currently 1262 self._match(TokenType.COMMA) 1263 1264 kwargs = { 1265 "no": self._match_text_seq("NO"), 1266 "dual": self._match_text_seq("DUAL"), 1267 "before": self._match_text_seq("BEFORE"), 1268 "default": self._match_text_seq("DEFAULT"), 1269 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1270 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1271 "after": self._match_text_seq("AFTER"), 1272 "minimum": self._match_texts(("MIN", "MINIMUM")), 1273 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1274 } 1275 1276 if self._match_texts(self.PROPERTY_PARSERS): 1277 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1278 try: 1279 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1280 except TypeError: 1281 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1282 1283 return None 1284 1285 def _parse_property(self) -> t.Optional[exp.Expression]: 1286 if self._match_texts(self.PROPERTY_PARSERS): 1287 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1288 1289 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1290 return self._parse_character_set(default=True) 1291 1292 if self._match_text_seq("COMPOUND", "SORTKEY"): 1293 return self._parse_sortkey(compound=True) 1294 1295 if self._match_text_seq("SQL", "SECURITY"): 1296 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1297 1298 assignment = self._match_pair( 1299 TokenType.VAR, TokenType.EQ, advance=False 1300 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1301 1302 if assignment: 1303 key = self._parse_var_or_string() 1304 self._match(TokenType.EQ) 1305 return self.expression(exp.Property, this=key, value=self._parse_column()) 1306 1307 return None 1308 1309 def _parse_stored(self) -> exp.FileFormatProperty: 1310 self._match(TokenType.ALIAS) 1311 1312 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1313 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1314 1315 return self.expression( 1316 exp.FileFormatProperty, 1317 this=self.expression( 1318 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1319 ) 1320 if input_format or output_format 1321 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1322 ) 1323 1324 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1325 self._match(TokenType.EQ) 1326 self._match(TokenType.ALIAS) 1327 return self.expression(exp_class, this=self._parse_field()) 1328 1329 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1330 properties = [] 1331 while True: 1332 if before: 1333 prop = self._parse_property_before() 1334 else: 1335 prop = self._parse_property() 1336 1337 if not prop: 1338 break 1339 for p in ensure_list(prop): 1340 properties.append(p) 1341 1342 if properties: 1343 return self.expression(exp.Properties, expressions=properties) 1344 1345 return None 1346 1347 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1348 return self.expression( 1349 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1350 ) 1351 1352 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1353 if self._index >= 2: 1354 pre_volatile_token = self._tokens[self._index - 2] 1355 else: 1356 pre_volatile_token = None 1357 1358 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1359 return exp.VolatileProperty() 1360 1361 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1362 1363 def _parse_with_property( 1364 self, 1365 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1366 self._match(TokenType.WITH) 1367 if self._match(TokenType.L_PAREN, advance=False): 1368 return self._parse_wrapped_csv(self._parse_property) 1369 1370 if self._match_text_seq("JOURNAL"): 1371 return self._parse_withjournaltable() 1372 1373 if self._match_text_seq("DATA"): 1374 return self._parse_withdata(no=False) 1375 elif self._match_text_seq("NO", "DATA"): 1376 return self._parse_withdata(no=True) 1377 1378 if not self._next: 1379 return None 1380 1381 return self._parse_withisolatedloading() 1382 1383 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1384 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1385 self._match(TokenType.EQ) 1386 1387 user = self._parse_id_var() 1388 self._match(TokenType.PARAMETER) 1389 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1390 1391 if not user or not host: 1392 return None 1393 1394 return exp.DefinerProperty(this=f"{user}@{host}") 1395 1396 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1397 self._match(TokenType.TABLE) 1398 self._match(TokenType.EQ) 1399 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1400 1401 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1402 return self.expression(exp.LogProperty, no=no) 1403 1404 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1405 return self.expression(exp.JournalProperty, **kwargs) 1406 1407 def _parse_checksum(self) -> exp.ChecksumProperty: 1408 self._match(TokenType.EQ) 1409 1410 on = None 1411 if self._match(TokenType.ON): 1412 on = True 1413 elif self._match_text_seq("OFF"): 1414 on = False 1415 1416 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1417 1418 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1419 if not self._match_text_seq("BY"): 1420 self._retreat(self._index - 1) 1421 return None 1422 1423 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1424 1425 def _parse_freespace(self) -> exp.FreespaceProperty: 1426 self._match(TokenType.EQ) 1427 return self.expression( 1428 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1429 ) 1430 1431 def _parse_mergeblockratio( 1432 self, no: bool = False, default: bool = False 1433 ) -> exp.MergeBlockRatioProperty: 1434 if self._match(TokenType.EQ): 1435 return self.expression( 1436 exp.MergeBlockRatioProperty, 1437 this=self._parse_number(), 1438 percent=self._match(TokenType.PERCENT), 1439 ) 1440 1441 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1442 1443 def _parse_datablocksize( 1444 self, 1445 default: t.Optional[bool] = None, 1446 minimum: t.Optional[bool] = None, 1447 maximum: t.Optional[bool] = None, 1448 ) -> exp.DataBlocksizeProperty: 1449 self._match(TokenType.EQ) 1450 size = self._parse_number() 1451 1452 units = None 1453 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1454 units = self._prev.text 1455 1456 return self.expression( 1457 exp.DataBlocksizeProperty, 1458 size=size, 1459 units=units, 1460 default=default, 1461 minimum=minimum, 1462 maximum=maximum, 1463 ) 1464 1465 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1466 self._match(TokenType.EQ) 1467 always = self._match_text_seq("ALWAYS") 1468 manual = self._match_text_seq("MANUAL") 1469 never = self._match_text_seq("NEVER") 1470 default = self._match_text_seq("DEFAULT") 1471 1472 autotemp = None 1473 if self._match_text_seq("AUTOTEMP"): 1474 autotemp = self._parse_schema() 1475 1476 return self.expression( 1477 exp.BlockCompressionProperty, 1478 always=always, 1479 manual=manual, 1480 never=never, 1481 default=default, 1482 autotemp=autotemp, 1483 ) 1484 1485 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1486 no = self._match_text_seq("NO") 1487 concurrent = self._match_text_seq("CONCURRENT") 1488 self._match_text_seq("ISOLATED", "LOADING") 1489 for_all = self._match_text_seq("FOR", "ALL") 1490 for_insert = self._match_text_seq("FOR", "INSERT") 1491 for_none = self._match_text_seq("FOR", "NONE") 1492 return self.expression( 1493 exp.IsolatedLoadingProperty, 1494 no=no, 1495 concurrent=concurrent, 1496 for_all=for_all, 1497 for_insert=for_insert, 1498 for_none=for_none, 1499 ) 1500 1501 def _parse_locking(self) -> exp.LockingProperty: 1502 if self._match(TokenType.TABLE): 1503 kind = "TABLE" 1504 elif self._match(TokenType.VIEW): 1505 kind = "VIEW" 1506 elif self._match(TokenType.ROW): 1507 kind = "ROW" 1508 elif self._match_text_seq("DATABASE"): 1509 kind = "DATABASE" 1510 else: 1511 kind = None 1512 1513 if kind in ("DATABASE", "TABLE", "VIEW"): 1514 this = self._parse_table_parts() 1515 else: 1516 this = None 1517 1518 if self._match(TokenType.FOR): 1519 for_or_in = "FOR" 1520 elif self._match(TokenType.IN): 1521 for_or_in = "IN" 1522 else: 1523 for_or_in = None 1524 1525 if self._match_text_seq("ACCESS"): 1526 lock_type = "ACCESS" 1527 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1528 lock_type = "EXCLUSIVE" 1529 elif self._match_text_seq("SHARE"): 1530 lock_type = "SHARE" 1531 elif self._match_text_seq("READ"): 1532 lock_type = "READ" 1533 elif self._match_text_seq("WRITE"): 1534 lock_type = "WRITE" 1535 elif self._match_text_seq("CHECKSUM"): 1536 lock_type = "CHECKSUM" 1537 else: 1538 lock_type = None 1539 1540 override = self._match_text_seq("OVERRIDE") 1541 1542 return self.expression( 1543 exp.LockingProperty, 1544 this=this, 1545 kind=kind, 1546 for_or_in=for_or_in, 1547 lock_type=lock_type, 1548 override=override, 1549 ) 1550 1551 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1552 if self._match(TokenType.PARTITION_BY): 1553 return self._parse_csv(self._parse_conjunction) 1554 return [] 1555 1556 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1557 self._match(TokenType.EQ) 1558 return self.expression( 1559 exp.PartitionedByProperty, 1560 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1561 ) 1562 1563 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1564 if self._match_text_seq("AND", "STATISTICS"): 1565 statistics = True 1566 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1567 statistics = False 1568 else: 1569 statistics = None 1570 1571 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1572 1573 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1574 if self._match_text_seq("PRIMARY", "INDEX"): 1575 return exp.NoPrimaryIndexProperty() 1576 return None 1577 1578 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1579 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1580 return exp.OnCommitProperty() 1581 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1582 return exp.OnCommitProperty(delete=True) 1583 return None 1584 1585 def _parse_distkey(self) -> exp.DistKeyProperty: 1586 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1587 1588 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1589 table = self._parse_table(schema=True) 1590 1591 options = [] 1592 while self._match_texts(("INCLUDING", "EXCLUDING")): 1593 this = self._prev.text.upper() 1594 1595 id_var = self._parse_id_var() 1596 if not id_var: 1597 return None 1598 1599 options.append( 1600 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1601 ) 1602 1603 return self.expression(exp.LikeProperty, this=table, expressions=options) 1604 1605 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1606 return self.expression( 1607 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1608 ) 1609 1610 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1611 self._match(TokenType.EQ) 1612 return self.expression( 1613 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1614 ) 1615 1616 def _parse_returns(self) -> exp.ReturnsProperty: 1617 value: t.Optional[exp.Expression] 1618 is_table = self._match(TokenType.TABLE) 1619 1620 if is_table: 1621 if self._match(TokenType.LT): 1622 value = self.expression( 1623 exp.Schema, 1624 this="TABLE", 1625 expressions=self._parse_csv(self._parse_struct_types), 1626 ) 1627 if not self._match(TokenType.GT): 1628 self.raise_error("Expecting >") 1629 else: 1630 value = self._parse_schema(exp.var("TABLE")) 1631 else: 1632 value = self._parse_types() 1633 1634 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1635 1636 def _parse_describe(self) -> exp.Describe: 1637 kind = self._match_set(self.CREATABLES) and self._prev.text 1638 this = self._parse_table() 1639 return self.expression(exp.Describe, this=this, kind=kind) 1640 1641 def _parse_insert(self) -> exp.Insert: 1642 overwrite = self._match(TokenType.OVERWRITE) 1643 local = self._match_text_seq("LOCAL") 1644 alternative = None 1645 1646 if self._match_text_seq("DIRECTORY"): 1647 this: t.Optional[exp.Expression] = self.expression( 1648 exp.Directory, 1649 this=self._parse_var_or_string(), 1650 local=local, 1651 row_format=self._parse_row_format(match_row=True), 1652 ) 1653 else: 1654 if self._match(TokenType.OR): 1655 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1656 1657 self._match(TokenType.INTO) 1658 self._match(TokenType.TABLE) 1659 this = self._parse_table(schema=True) 1660 1661 return self.expression( 1662 exp.Insert, 1663 this=this, 1664 exists=self._parse_exists(), 1665 partition=self._parse_partition(), 1666 expression=self._parse_ddl_select(), 1667 conflict=self._parse_on_conflict(), 1668 returning=self._parse_returning(), 1669 overwrite=overwrite, 1670 alternative=alternative, 1671 ) 1672 1673 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1674 conflict = self._match_text_seq("ON", "CONFLICT") 1675 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1676 1677 if not conflict and not duplicate: 1678 return None 1679 1680 nothing = None 1681 expressions = None 1682 key = None 1683 constraint = None 1684 1685 if conflict: 1686 if self._match_text_seq("ON", "CONSTRAINT"): 1687 constraint = self._parse_id_var() 1688 else: 1689 key = self._parse_csv(self._parse_value) 1690 1691 self._match_text_seq("DO") 1692 if self._match_text_seq("NOTHING"): 1693 nothing = True 1694 else: 1695 self._match(TokenType.UPDATE) 1696 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1697 1698 return self.expression( 1699 exp.OnConflict, 1700 duplicate=duplicate, 1701 expressions=expressions, 1702 nothing=nothing, 1703 key=key, 1704 constraint=constraint, 1705 ) 1706 1707 def _parse_returning(self) -> t.Optional[exp.Returning]: 1708 if not self._match(TokenType.RETURNING): 1709 return None 1710 1711 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1712 1713 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1714 if not self._match(TokenType.FORMAT): 1715 return None 1716 return self._parse_row_format() 1717 1718 def _parse_row_format( 1719 self, match_row: bool = False 1720 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1721 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1722 return None 1723 1724 if self._match_text_seq("SERDE"): 1725 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1726 1727 self._match_text_seq("DELIMITED") 1728 1729 kwargs = {} 1730 1731 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1732 kwargs["fields"] = self._parse_string() 1733 if self._match_text_seq("ESCAPED", "BY"): 1734 kwargs["escaped"] = self._parse_string() 1735 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1736 kwargs["collection_items"] = self._parse_string() 1737 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1738 kwargs["map_keys"] = self._parse_string() 1739 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1740 kwargs["lines"] = self._parse_string() 1741 if self._match_text_seq("NULL", "DEFINED", "AS"): 1742 kwargs["null"] = self._parse_string() 1743 1744 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1745 1746 def _parse_load(self) -> exp.LoadData | exp.Command: 1747 if self._match_text_seq("DATA"): 1748 local = self._match_text_seq("LOCAL") 1749 self._match_text_seq("INPATH") 1750 inpath = self._parse_string() 1751 overwrite = self._match(TokenType.OVERWRITE) 1752 self._match_pair(TokenType.INTO, TokenType.TABLE) 1753 1754 return self.expression( 1755 exp.LoadData, 1756 this=self._parse_table(schema=True), 1757 local=local, 1758 overwrite=overwrite, 1759 inpath=inpath, 1760 partition=self._parse_partition(), 1761 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1762 serde=self._match_text_seq("SERDE") and self._parse_string(), 1763 ) 1764 return self._parse_as_command(self._prev) 1765 1766 def _parse_delete(self) -> exp.Delete: 1767 self._match(TokenType.FROM) 1768 1769 return self.expression( 1770 exp.Delete, 1771 this=self._parse_table(), 1772 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1773 where=self._parse_where(), 1774 returning=self._parse_returning(), 1775 ) 1776 1777 def _parse_update(self) -> exp.Update: 1778 return self.expression( 1779 exp.Update, 1780 **{ # type: ignore 1781 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1782 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1783 "from": self._parse_from(modifiers=True), 1784 "where": self._parse_where(), 1785 "returning": self._parse_returning(), 1786 }, 1787 ) 1788 1789 def _parse_uncache(self) -> exp.Uncache: 1790 if not self._match(TokenType.TABLE): 1791 self.raise_error("Expecting TABLE after UNCACHE") 1792 1793 return self.expression( 1794 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1795 ) 1796 1797 def _parse_cache(self) -> exp.Cache: 1798 lazy = self._match_text_seq("LAZY") 1799 self._match(TokenType.TABLE) 1800 table = self._parse_table(schema=True) 1801 1802 options = [] 1803 if self._match_text_seq("OPTIONS"): 1804 self._match_l_paren() 1805 k = self._parse_string() 1806 self._match(TokenType.EQ) 1807 v = self._parse_string() 1808 options = [k, v] 1809 self._match_r_paren() 1810 1811 self._match(TokenType.ALIAS) 1812 return self.expression( 1813 exp.Cache, 1814 this=table, 1815 lazy=lazy, 1816 options=options, 1817 expression=self._parse_select(nested=True), 1818 ) 1819 1820 def _parse_partition(self) -> t.Optional[exp.Partition]: 1821 if not self._match(TokenType.PARTITION): 1822 return None 1823 1824 return self.expression( 1825 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1826 ) 1827 1828 def _parse_value(self) -> exp.Tuple: 1829 if self._match(TokenType.L_PAREN): 1830 expressions = self._parse_csv(self._parse_conjunction) 1831 self._match_r_paren() 1832 return self.expression(exp.Tuple, expressions=expressions) 1833 1834 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1835 # Source: https://prestodb.io/docs/current/sql/values.html 1836 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1837 1838 def _parse_select( 1839 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1840 ) -> t.Optional[exp.Expression]: 1841 cte = self._parse_with() 1842 if cte: 1843 this = self._parse_statement() 1844 1845 if not this: 1846 self.raise_error("Failed to parse any statement following CTE") 1847 return cte 1848 1849 if "with" in this.arg_types: 1850 this.set("with", cte) 1851 else: 1852 self.raise_error(f"{this.key} does not support CTE") 1853 this = cte 1854 elif self._match(TokenType.SELECT): 1855 comments = self._prev_comments 1856 1857 hint = self._parse_hint() 1858 all_ = self._match(TokenType.ALL) 1859 distinct = self._match(TokenType.DISTINCT) 1860 1861 kind = ( 1862 self._match(TokenType.ALIAS) 1863 and self._match_texts(("STRUCT", "VALUE")) 1864 and self._prev.text 1865 ) 1866 1867 if distinct: 1868 distinct = self.expression( 1869 exp.Distinct, 1870 on=self._parse_value() if self._match(TokenType.ON) else None, 1871 ) 1872 1873 if all_ and distinct: 1874 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1875 1876 limit = self._parse_limit(top=True) 1877 expressions = self._parse_csv(self._parse_expression) 1878 1879 this = self.expression( 1880 exp.Select, 1881 kind=kind, 1882 hint=hint, 1883 distinct=distinct, 1884 expressions=expressions, 1885 limit=limit, 1886 ) 1887 this.comments = comments 1888 1889 into = self._parse_into() 1890 if into: 1891 this.set("into", into) 1892 1893 from_ = self._parse_from() 1894 if from_: 1895 this.set("from", from_) 1896 1897 this = self._parse_query_modifiers(this) 1898 elif (table or nested) and self._match(TokenType.L_PAREN): 1899 if self._match(TokenType.PIVOT): 1900 this = self._parse_simplified_pivot() 1901 elif self._match(TokenType.FROM): 1902 this = exp.select("*").from_( 1903 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1904 ) 1905 else: 1906 this = self._parse_table() if table else self._parse_select(nested=True) 1907 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1908 1909 self._match_r_paren() 1910 1911 # early return so that subquery unions aren't parsed again 1912 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1913 # Union ALL should be a property of the top select node, not the subquery 1914 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1915 elif self._match(TokenType.VALUES): 1916 this = self.expression( 1917 exp.Values, 1918 expressions=self._parse_csv(self._parse_value), 1919 alias=self._parse_table_alias(), 1920 ) 1921 else: 1922 this = None 1923 1924 return self._parse_set_operations(this) 1925 1926 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1927 if not skip_with_token and not self._match(TokenType.WITH): 1928 return None 1929 1930 comments = self._prev_comments 1931 recursive = self._match(TokenType.RECURSIVE) 1932 1933 expressions = [] 1934 while True: 1935 expressions.append(self._parse_cte()) 1936 1937 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1938 break 1939 else: 1940 self._match(TokenType.WITH) 1941 1942 return self.expression( 1943 exp.With, comments=comments, expressions=expressions, recursive=recursive 1944 ) 1945 1946 def _parse_cte(self) -> exp.CTE: 1947 alias = self._parse_table_alias() 1948 if not alias or not alias.this: 1949 self.raise_error("Expected CTE to have alias") 1950 1951 self._match(TokenType.ALIAS) 1952 return self.expression( 1953 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1954 ) 1955 1956 def _parse_table_alias( 1957 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1958 ) -> t.Optional[exp.TableAlias]: 1959 any_token = self._match(TokenType.ALIAS) 1960 alias = ( 1961 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1962 or self._parse_string_as_identifier() 1963 ) 1964 1965 index = self._index 1966 if self._match(TokenType.L_PAREN): 1967 columns = self._parse_csv(self._parse_function_parameter) 1968 self._match_r_paren() if columns else self._retreat(index) 1969 else: 1970 columns = None 1971 1972 if not alias and not columns: 1973 return None 1974 1975 return self.expression(exp.TableAlias, this=alias, columns=columns) 1976 1977 def _parse_subquery( 1978 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1979 ) -> t.Optional[exp.Subquery]: 1980 if not this: 1981 return None 1982 1983 return self.expression( 1984 exp.Subquery, 1985 this=this, 1986 pivots=self._parse_pivots(), 1987 alias=self._parse_table_alias() if parse_alias else None, 1988 ) 1989 1990 def _parse_query_modifiers( 1991 self, this: t.Optional[exp.Expression] 1992 ) -> t.Optional[exp.Expression]: 1993 if isinstance(this, self.MODIFIABLES): 1994 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1995 expression = parser(self) 1996 1997 if expression: 1998 this.set(key, expression) 1999 return this 2000 2001 def _parse_hint(self) -> t.Optional[exp.Hint]: 2002 if self._match(TokenType.HINT): 2003 hints = self._parse_csv(self._parse_function) 2004 2005 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2006 self.raise_error("Expected */ after HINT") 2007 2008 return self.expression(exp.Hint, expressions=hints) 2009 2010 return None 2011 2012 def _parse_into(self) -> t.Optional[exp.Into]: 2013 if not self._match(TokenType.INTO): 2014 return None 2015 2016 temp = self._match(TokenType.TEMPORARY) 2017 unlogged = self._match_text_seq("UNLOGGED") 2018 self._match(TokenType.TABLE) 2019 2020 return self.expression( 2021 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2022 ) 2023 2024 def _parse_from( 2025 self, modifiers: bool = False, skip_from_token: bool = False 2026 ) -> t.Optional[exp.From]: 2027 if not skip_from_token and not self._match(TokenType.FROM): 2028 return None 2029 2030 comments = self._prev_comments 2031 this = self._parse_table() 2032 2033 return self.expression( 2034 exp.From, 2035 comments=comments, 2036 this=self._parse_query_modifiers(this) if modifiers else this, 2037 ) 2038 2039 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2040 if not self._match(TokenType.MATCH_RECOGNIZE): 2041 return None 2042 2043 self._match_l_paren() 2044 2045 partition = self._parse_partition_by() 2046 order = self._parse_order() 2047 measures = ( 2048 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2049 ) 2050 2051 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2052 rows = exp.var("ONE ROW PER MATCH") 2053 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2054 text = "ALL ROWS PER MATCH" 2055 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2056 text += f" SHOW EMPTY MATCHES" 2057 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2058 text += f" OMIT EMPTY MATCHES" 2059 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2060 text += f" WITH UNMATCHED ROWS" 2061 rows = exp.var(text) 2062 else: 2063 rows = None 2064 2065 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2066 text = "AFTER MATCH SKIP" 2067 if self._match_text_seq("PAST", "LAST", "ROW"): 2068 text += f" PAST LAST ROW" 2069 elif self._match_text_seq("TO", "NEXT", "ROW"): 2070 text += f" TO NEXT ROW" 2071 elif self._match_text_seq("TO", "FIRST"): 2072 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2073 elif self._match_text_seq("TO", "LAST"): 2074 text += f" TO LAST {self._advance_any().text}" # type: ignore 2075 after = exp.var(text) 2076 else: 2077 after = None 2078 2079 if self._match_text_seq("PATTERN"): 2080 self._match_l_paren() 2081 2082 if not self._curr: 2083 self.raise_error("Expecting )", self._curr) 2084 2085 paren = 1 2086 start = self._curr 2087 2088 while self._curr and paren > 0: 2089 if self._curr.token_type == TokenType.L_PAREN: 2090 paren += 1 2091 if self._curr.token_type == TokenType.R_PAREN: 2092 paren -= 1 2093 2094 end = self._prev 2095 self._advance() 2096 2097 if paren > 0: 2098 self.raise_error("Expecting )", self._curr) 2099 2100 pattern = exp.var(self._find_sql(start, end)) 2101 else: 2102 pattern = None 2103 2104 define = ( 2105 self._parse_csv( 2106 lambda: self.expression( 2107 exp.Alias, 2108 alias=self._parse_id_var(any_token=True), 2109 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2110 ) 2111 ) 2112 if self._match_text_seq("DEFINE") 2113 else None 2114 ) 2115 2116 self._match_r_paren() 2117 2118 return self.expression( 2119 exp.MatchRecognize, 2120 partition_by=partition, 2121 order=order, 2122 measures=measures, 2123 rows=rows, 2124 after=after, 2125 pattern=pattern, 2126 define=define, 2127 alias=self._parse_table_alias(), 2128 ) 2129 2130 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2131 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2132 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2133 2134 if outer_apply or cross_apply: 2135 this = self._parse_select(table=True) 2136 view = None 2137 outer = not cross_apply 2138 elif self._match(TokenType.LATERAL): 2139 this = self._parse_select(table=True) 2140 view = self._match(TokenType.VIEW) 2141 outer = self._match(TokenType.OUTER) 2142 else: 2143 return None 2144 2145 if not this: 2146 this = self._parse_function() or self._parse_id_var(any_token=False) 2147 while self._match(TokenType.DOT): 2148 this = exp.Dot( 2149 this=this, 2150 expression=self._parse_function() or self._parse_id_var(any_token=False), 2151 ) 2152 2153 if view: 2154 table = self._parse_id_var(any_token=False) 2155 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2156 table_alias: t.Optional[exp.TableAlias] = self.expression( 2157 exp.TableAlias, this=table, columns=columns 2158 ) 2159 else: 2160 table_alias = self._parse_table_alias() 2161 2162 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2163 2164 def _parse_join_parts( 2165 self, 2166 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2167 return ( 2168 self._match_set(self.JOIN_METHODS) and self._prev, 2169 self._match_set(self.JOIN_SIDES) and self._prev, 2170 self._match_set(self.JOIN_KINDS) and self._prev, 2171 ) 2172 2173 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2174 if self._match(TokenType.COMMA): 2175 return self.expression(exp.Join, this=self._parse_table()) 2176 2177 index = self._index 2178 method, side, kind = self._parse_join_parts() 2179 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2180 join = self._match(TokenType.JOIN) 2181 2182 if not skip_join_token and not join: 2183 self._retreat(index) 2184 kind = None 2185 method = None 2186 side = None 2187 2188 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2189 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2190 2191 if not skip_join_token and not join and not outer_apply and not cross_apply: 2192 return None 2193 2194 if outer_apply: 2195 side = Token(TokenType.LEFT, "LEFT") 2196 2197 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2198 2199 if method: 2200 kwargs["method"] = method.text 2201 if side: 2202 kwargs["side"] = side.text 2203 if kind: 2204 kwargs["kind"] = kind.text 2205 if hint: 2206 kwargs["hint"] = hint 2207 2208 if self._match(TokenType.ON): 2209 kwargs["on"] = self._parse_conjunction() 2210 elif self._match(TokenType.USING): 2211 kwargs["using"] = self._parse_wrapped_id_vars() 2212 2213 return self.expression(exp.Join, **kwargs) 2214 2215 def _parse_index( 2216 self, 2217 index: t.Optional[exp.Expression] = None, 2218 ) -> t.Optional[exp.Index]: 2219 if index: 2220 unique = None 2221 primary = None 2222 amp = None 2223 2224 self._match(TokenType.ON) 2225 self._match(TokenType.TABLE) # hive 2226 table = self._parse_table_parts(schema=True) 2227 else: 2228 unique = self._match(TokenType.UNIQUE) 2229 primary = self._match_text_seq("PRIMARY") 2230 amp = self._match_text_seq("AMP") 2231 2232 if not self._match(TokenType.INDEX): 2233 return None 2234 2235 index = self._parse_id_var() 2236 table = None 2237 2238 using = self._parse_field() if self._match(TokenType.USING) else None 2239 2240 if self._match(TokenType.L_PAREN, advance=False): 2241 columns = self._parse_wrapped_csv(self._parse_ordered) 2242 else: 2243 columns = None 2244 2245 return self.expression( 2246 exp.Index, 2247 this=index, 2248 table=table, 2249 using=using, 2250 columns=columns, 2251 unique=unique, 2252 primary=primary, 2253 amp=amp, 2254 partition_by=self._parse_partition_by(), 2255 ) 2256 2257 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2258 return ( 2259 (not schema and self._parse_function()) 2260 or self._parse_id_var(any_token=False) 2261 or self._parse_string_as_identifier() 2262 or self._parse_placeholder() 2263 ) 2264 2265 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2266 catalog = None 2267 db = None 2268 table = self._parse_table_part(schema=schema) 2269 2270 while self._match(TokenType.DOT): 2271 if catalog: 2272 # This allows nesting the table in arbitrarily many dot expressions if needed 2273 table = self.expression( 2274 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2275 ) 2276 else: 2277 catalog = db 2278 db = table 2279 table = self._parse_table_part(schema=schema) 2280 2281 if not table: 2282 self.raise_error(f"Expected table name but got {self._curr}") 2283 2284 return self.expression( 2285 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2286 ) 2287 2288 def _parse_table( 2289 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2290 ) -> t.Optional[exp.Expression]: 2291 lateral = self._parse_lateral() 2292 if lateral: 2293 return lateral 2294 2295 unnest = self._parse_unnest() 2296 if unnest: 2297 return unnest 2298 2299 values = self._parse_derived_table_values() 2300 if values: 2301 return values 2302 2303 subquery = self._parse_select(table=True) 2304 if subquery: 2305 if not subquery.args.get("pivots"): 2306 subquery.set("pivots", self._parse_pivots()) 2307 return subquery 2308 2309 this: exp.Expression = self._parse_table_parts(schema=schema) 2310 2311 if schema: 2312 return self._parse_schema(this=this) 2313 2314 if self.ALIAS_POST_TABLESAMPLE: 2315 table_sample = self._parse_table_sample() 2316 2317 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2318 if alias: 2319 this.set("alias", alias) 2320 2321 if not this.args.get("pivots"): 2322 this.set("pivots", self._parse_pivots()) 2323 2324 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2325 this.set( 2326 "hints", 2327 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2328 ) 2329 self._match_r_paren() 2330 2331 if not self.ALIAS_POST_TABLESAMPLE: 2332 table_sample = self._parse_table_sample() 2333 2334 if table_sample: 2335 table_sample.set("this", this) 2336 this = table_sample 2337 2338 return this 2339 2340 def _parse_unnest(self) -> t.Optional[exp.Unnest]: 2341 if not self._match(TokenType.UNNEST): 2342 return None 2343 2344 expressions = self._parse_wrapped_csv(self._parse_type) 2345 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2346 alias = self._parse_table_alias() 2347 2348 if alias and self.UNNEST_COLUMN_ONLY: 2349 if alias.args.get("columns"): 2350 self.raise_error("Unexpected extra column alias in unnest.") 2351 2352 alias.set("columns", [alias.this]) 2353 alias.set("this", None) 2354 2355 offset = None 2356 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2357 self._match(TokenType.ALIAS) 2358 offset = self._parse_id_var() or exp.to_identifier("offset") 2359 2360 return self.expression( 2361 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2362 ) 2363 2364 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2365 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2366 if not is_derived and not self._match(TokenType.VALUES): 2367 return None 2368 2369 expressions = self._parse_csv(self._parse_value) 2370 2371 if is_derived: 2372 self._match_r_paren() 2373 2374 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2375 2376 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2377 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2378 as_modifier and self._match_text_seq("USING", "SAMPLE") 2379 ): 2380 return None 2381 2382 bucket_numerator = None 2383 bucket_denominator = None 2384 bucket_field = None 2385 percent = None 2386 rows = None 2387 size = None 2388 seed = None 2389 2390 kind = ( 2391 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2392 ) 2393 method = self._parse_var(tokens=(TokenType.ROW,)) 2394 2395 self._match(TokenType.L_PAREN) 2396 2397 num = self._parse_number() 2398 2399 if self._match_text_seq("BUCKET"): 2400 bucket_numerator = self._parse_number() 2401 self._match_text_seq("OUT", "OF") 2402 bucket_denominator = bucket_denominator = self._parse_number() 2403 self._match(TokenType.ON) 2404 bucket_field = self._parse_field() 2405 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2406 percent = num 2407 elif self._match(TokenType.ROWS): 2408 rows = num 2409 else: 2410 size = num 2411 2412 self._match(TokenType.R_PAREN) 2413 2414 if self._match(TokenType.L_PAREN): 2415 method = self._parse_var() 2416 seed = self._match(TokenType.COMMA) and self._parse_number() 2417 self._match_r_paren() 2418 elif self._match_texts(("SEED", "REPEATABLE")): 2419 seed = self._parse_wrapped(self._parse_number) 2420 2421 return self.expression( 2422 exp.TableSample, 2423 method=method, 2424 bucket_numerator=bucket_numerator, 2425 bucket_denominator=bucket_denominator, 2426 bucket_field=bucket_field, 2427 percent=percent, 2428 rows=rows, 2429 size=size, 2430 seed=seed, 2431 kind=kind, 2432 ) 2433 2434 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2435 return list(iter(self._parse_pivot, None)) 2436 2437 # https://duckdb.org/docs/sql/statements/pivot 2438 def _parse_simplified_pivot(self) -> exp.Pivot: 2439 def _parse_on() -> t.Optional[exp.Expression]: 2440 this = self._parse_bitwise() 2441 return self._parse_in(this) if self._match(TokenType.IN) else this 2442 2443 this = self._parse_table() 2444 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2445 using = self._match(TokenType.USING) and self._parse_csv( 2446 lambda: self._parse_alias(self._parse_function()) 2447 ) 2448 group = self._parse_group() 2449 return self.expression( 2450 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2451 ) 2452 2453 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2454 index = self._index 2455 2456 if self._match(TokenType.PIVOT): 2457 unpivot = False 2458 elif self._match(TokenType.UNPIVOT): 2459 unpivot = True 2460 else: 2461 return None 2462 2463 expressions = [] 2464 field = None 2465 2466 if not self._match(TokenType.L_PAREN): 2467 self._retreat(index) 2468 return None 2469 2470 if unpivot: 2471 expressions = self._parse_csv(self._parse_column) 2472 else: 2473 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2474 2475 if not expressions: 2476 self.raise_error("Failed to parse PIVOT's aggregation list") 2477 2478 if not self._match(TokenType.FOR): 2479 self.raise_error("Expecting FOR") 2480 2481 value = self._parse_column() 2482 2483 if not self._match(TokenType.IN): 2484 self.raise_error("Expecting IN") 2485 2486 field = self._parse_in(value, alias=True) 2487 2488 self._match_r_paren() 2489 2490 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2491 2492 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2493 pivot.set("alias", self._parse_table_alias()) 2494 2495 if not unpivot: 2496 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2497 2498 columns: t.List[exp.Expression] = [] 2499 for fld in pivot.args["field"].expressions: 2500 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2501 for name in names: 2502 if self.PREFIXED_PIVOT_COLUMNS: 2503 name = f"{name}_{field_name}" if name else field_name 2504 else: 2505 name = f"{field_name}_{name}" if name else field_name 2506 2507 columns.append(exp.to_identifier(name)) 2508 2509 pivot.set("columns", columns) 2510 2511 return pivot 2512 2513 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2514 return [agg.alias for agg in aggregations] 2515 2516 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2517 if not skip_where_token and not self._match(TokenType.WHERE): 2518 return None 2519 2520 return self.expression( 2521 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2522 ) 2523 2524 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2525 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2526 return None 2527 2528 elements = defaultdict(list) 2529 2530 while True: 2531 expressions = self._parse_csv(self._parse_conjunction) 2532 if expressions: 2533 elements["expressions"].extend(expressions) 2534 2535 grouping_sets = self._parse_grouping_sets() 2536 if grouping_sets: 2537 elements["grouping_sets"].extend(grouping_sets) 2538 2539 rollup = None 2540 cube = None 2541 totals = None 2542 2543 with_ = self._match(TokenType.WITH) 2544 if self._match(TokenType.ROLLUP): 2545 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2546 elements["rollup"].extend(ensure_list(rollup)) 2547 2548 if self._match(TokenType.CUBE): 2549 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2550 elements["cube"].extend(ensure_list(cube)) 2551 2552 if self._match_text_seq("TOTALS"): 2553 totals = True 2554 elements["totals"] = True # type: ignore 2555 2556 if not (grouping_sets or rollup or cube or totals): 2557 break 2558 2559 return self.expression(exp.Group, **elements) # type: ignore 2560 2561 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2562 if not self._match(TokenType.GROUPING_SETS): 2563 return None 2564 2565 return self._parse_wrapped_csv(self._parse_grouping_set) 2566 2567 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2568 if self._match(TokenType.L_PAREN): 2569 grouping_set = self._parse_csv(self._parse_column) 2570 self._match_r_paren() 2571 return self.expression(exp.Tuple, expressions=grouping_set) 2572 2573 return self._parse_column() 2574 2575 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2576 if not skip_having_token and not self._match(TokenType.HAVING): 2577 return None 2578 return self.expression(exp.Having, this=self._parse_conjunction()) 2579 2580 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2581 if not self._match(TokenType.QUALIFY): 2582 return None 2583 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2584 2585 def _parse_order( 2586 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2587 ) -> t.Optional[exp.Expression]: 2588 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2589 return this 2590 2591 return self.expression( 2592 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2593 ) 2594 2595 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2596 if not self._match_text_seq(*texts): 2597 return None 2598 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2599 2600 def _parse_ordered(self) -> exp.Ordered: 2601 this = self._parse_conjunction() 2602 self._match(TokenType.ASC) 2603 2604 is_desc = self._match(TokenType.DESC) 2605 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2606 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2607 desc = is_desc or False 2608 asc = not desc 2609 nulls_first = is_nulls_first or False 2610 explicitly_null_ordered = is_nulls_first or is_nulls_last 2611 2612 if ( 2613 not explicitly_null_ordered 2614 and ( 2615 (asc and self.NULL_ORDERING == "nulls_are_small") 2616 or (desc and self.NULL_ORDERING != "nulls_are_small") 2617 ) 2618 and self.NULL_ORDERING != "nulls_are_last" 2619 ): 2620 nulls_first = True 2621 2622 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2623 2624 def _parse_limit( 2625 self, this: t.Optional[exp.Expression] = None, top: bool = False 2626 ) -> t.Optional[exp.Expression]: 2627 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2628 limit_paren = self._match(TokenType.L_PAREN) 2629 limit_exp = self.expression( 2630 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2631 ) 2632 2633 if limit_paren: 2634 self._match_r_paren() 2635 2636 return limit_exp 2637 2638 if self._match(TokenType.FETCH): 2639 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2640 direction = self._prev.text if direction else "FIRST" 2641 2642 count = self._parse_number() 2643 percent = self._match(TokenType.PERCENT) 2644 2645 self._match_set((TokenType.ROW, TokenType.ROWS)) 2646 2647 only = self._match_text_seq("ONLY") 2648 with_ties = self._match_text_seq("WITH", "TIES") 2649 2650 if only and with_ties: 2651 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2652 2653 return self.expression( 2654 exp.Fetch, 2655 direction=direction, 2656 count=count, 2657 percent=percent, 2658 with_ties=with_ties, 2659 ) 2660 2661 return this 2662 2663 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2664 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2665 return this 2666 2667 count = self._parse_number() 2668 self._match_set((TokenType.ROW, TokenType.ROWS)) 2669 return self.expression(exp.Offset, this=this, expression=count) 2670 2671 def _parse_locks(self) -> t.List[exp.Lock]: 2672 locks = [] 2673 while True: 2674 if self._match_text_seq("FOR", "UPDATE"): 2675 update = True 2676 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2677 "LOCK", "IN", "SHARE", "MODE" 2678 ): 2679 update = False 2680 else: 2681 break 2682 2683 expressions = None 2684 if self._match_text_seq("OF"): 2685 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2686 2687 wait: t.Optional[bool | exp.Expression] = None 2688 if self._match_text_seq("NOWAIT"): 2689 wait = True 2690 elif self._match_text_seq("WAIT"): 2691 wait = self._parse_primary() 2692 elif self._match_text_seq("SKIP", "LOCKED"): 2693 wait = False 2694 2695 locks.append( 2696 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2697 ) 2698 2699 return locks 2700 2701 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2702 if not self._match_set(self.SET_OPERATIONS): 2703 return this 2704 2705 token_type = self._prev.token_type 2706 2707 if token_type == TokenType.UNION: 2708 expression = exp.Union 2709 elif token_type == TokenType.EXCEPT: 2710 expression = exp.Except 2711 else: 2712 expression = exp.Intersect 2713 2714 return self.expression( 2715 expression, 2716 this=this, 2717 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2718 expression=self._parse_set_operations(self._parse_select(nested=True)), 2719 ) 2720 2721 def _parse_expression(self) -> t.Optional[exp.Expression]: 2722 return self._parse_alias(self._parse_conjunction()) 2723 2724 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2725 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2726 2727 def _parse_equality(self) -> t.Optional[exp.Expression]: 2728 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2729 2730 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2731 return self._parse_tokens(self._parse_range, self.COMPARISON) 2732 2733 def _parse_range(self) -> t.Optional[exp.Expression]: 2734 this = self._parse_bitwise() 2735 negate = self._match(TokenType.NOT) 2736 2737 if self._match_set(self.RANGE_PARSERS): 2738 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2739 if not expression: 2740 return this 2741 2742 this = expression 2743 elif self._match(TokenType.ISNULL): 2744 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2745 2746 # Postgres supports ISNULL and NOTNULL for conditions. 2747 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2748 if self._match(TokenType.NOTNULL): 2749 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2750 this = self.expression(exp.Not, this=this) 2751 2752 if negate: 2753 this = self.expression(exp.Not, this=this) 2754 2755 if self._match(TokenType.IS): 2756 this = self._parse_is(this) 2757 2758 return this 2759 2760 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2761 index = self._index - 1 2762 negate = self._match(TokenType.NOT) 2763 2764 if self._match_text_seq("DISTINCT", "FROM"): 2765 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2766 return self.expression(klass, this=this, expression=self._parse_expression()) 2767 2768 expression = self._parse_null() or self._parse_boolean() 2769 if not expression: 2770 self._retreat(index) 2771 return None 2772 2773 this = self.expression(exp.Is, this=this, expression=expression) 2774 return self.expression(exp.Not, this=this) if negate else this 2775 2776 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2777 unnest = self._parse_unnest() 2778 if unnest: 2779 this = self.expression(exp.In, this=this, unnest=unnest) 2780 elif self._match(TokenType.L_PAREN): 2781 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2782 2783 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2784 this = self.expression(exp.In, this=this, query=expressions[0]) 2785 else: 2786 this = self.expression(exp.In, this=this, expressions=expressions) 2787 2788 self._match_r_paren(this) 2789 else: 2790 this = self.expression(exp.In, this=this, field=self._parse_field()) 2791 2792 return this 2793 2794 def _parse_between(self, this: exp.Expression) -> exp.Between: 2795 low = self._parse_bitwise() 2796 self._match(TokenType.AND) 2797 high = self._parse_bitwise() 2798 return self.expression(exp.Between, this=this, low=low, high=high) 2799 2800 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2801 if not self._match(TokenType.ESCAPE): 2802 return this 2803 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2804 2805 def _parse_interval(self) -> t.Optional[exp.Interval]: 2806 if not self._match(TokenType.INTERVAL): 2807 return None 2808 2809 this = self._parse_primary() or self._parse_term() 2810 unit = self._parse_function() or self._parse_var() 2811 2812 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2813 # each INTERVAL expression into this canonical form so it's easy to transpile 2814 if this and this.is_number: 2815 this = exp.Literal.string(this.name) 2816 elif this and this.is_string: 2817 parts = this.name.split() 2818 2819 if len(parts) == 2: 2820 if unit: 2821 # this is not actually a unit, it's something else 2822 unit = None 2823 self._retreat(self._index - 1) 2824 else: 2825 this = exp.Literal.string(parts[0]) 2826 unit = self.expression(exp.Var, this=parts[1]) 2827 2828 return self.expression(exp.Interval, this=this, unit=unit) 2829 2830 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2831 this = self._parse_term() 2832 2833 while True: 2834 if self._match_set(self.BITWISE): 2835 this = self.expression( 2836 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2837 ) 2838 elif self._match_pair(TokenType.LT, TokenType.LT): 2839 this = self.expression( 2840 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2841 ) 2842 elif self._match_pair(TokenType.GT, TokenType.GT): 2843 this = self.expression( 2844 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2845 ) 2846 else: 2847 break 2848 2849 return this 2850 2851 def _parse_term(self) -> t.Optional[exp.Expression]: 2852 return self._parse_tokens(self._parse_factor, self.TERM) 2853 2854 def _parse_factor(self) -> t.Optional[exp.Expression]: 2855 return self._parse_tokens(self._parse_unary, self.FACTOR) 2856 2857 def _parse_unary(self) -> t.Optional[exp.Expression]: 2858 if self._match_set(self.UNARY_PARSERS): 2859 return self.UNARY_PARSERS[self._prev.token_type](self) 2860 return self._parse_at_time_zone(self._parse_type()) 2861 2862 def _parse_type(self) -> t.Optional[exp.Expression]: 2863 interval = self._parse_interval() 2864 if interval: 2865 return interval 2866 2867 index = self._index 2868 data_type = self._parse_types(check_func=True) 2869 this = self._parse_column() 2870 2871 if data_type: 2872 if isinstance(this, exp.Literal): 2873 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2874 if parser: 2875 return parser(self, this, data_type) 2876 return self.expression(exp.Cast, this=this, to=data_type) 2877 if not data_type.expressions: 2878 self._retreat(index) 2879 return self._parse_column() 2880 return self._parse_column_ops(data_type) 2881 2882 return this 2883 2884 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2885 this = self._parse_type() 2886 if not this: 2887 return None 2888 2889 return self.expression( 2890 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2891 ) 2892 2893 def _parse_types( 2894 self, check_func: bool = False, schema: bool = False 2895 ) -> t.Optional[exp.Expression]: 2896 index = self._index 2897 2898 prefix = self._match_text_seq("SYSUDTLIB", ".") 2899 2900 if not self._match_set(self.TYPE_TOKENS): 2901 return None 2902 2903 type_token = self._prev.token_type 2904 2905 if type_token == TokenType.PSEUDO_TYPE: 2906 return self.expression(exp.PseudoType, this=self._prev.text) 2907 2908 nested = type_token in self.NESTED_TYPE_TOKENS 2909 is_struct = type_token == TokenType.STRUCT 2910 expressions = None 2911 maybe_func = False 2912 2913 if self._match(TokenType.L_PAREN): 2914 if is_struct: 2915 expressions = self._parse_csv(self._parse_struct_types) 2916 elif nested: 2917 expressions = self._parse_csv( 2918 lambda: self._parse_types(check_func=check_func, schema=schema) 2919 ) 2920 else: 2921 expressions = self._parse_csv(self._parse_type_size) 2922 2923 if not expressions or not self._match(TokenType.R_PAREN): 2924 self._retreat(index) 2925 return None 2926 2927 maybe_func = True 2928 2929 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2930 this = exp.DataType( 2931 this=exp.DataType.Type.ARRAY, 2932 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2933 nested=True, 2934 ) 2935 2936 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2937 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2938 2939 return this 2940 2941 if self._match(TokenType.L_BRACKET): 2942 self._retreat(index) 2943 return None 2944 2945 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2946 if nested and self._match(TokenType.LT): 2947 if is_struct: 2948 expressions = self._parse_csv(self._parse_struct_types) 2949 else: 2950 expressions = self._parse_csv( 2951 lambda: self._parse_types(check_func=check_func, schema=schema) 2952 ) 2953 2954 if not self._match(TokenType.GT): 2955 self.raise_error("Expecting >") 2956 2957 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2958 values = self._parse_csv(self._parse_conjunction) 2959 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2960 2961 value: t.Optional[exp.Expression] = None 2962 if type_token in self.TIMESTAMPS: 2963 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2964 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2965 elif ( 2966 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2967 or type_token == TokenType.TIMESTAMPLTZ 2968 ): 2969 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2970 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2971 if type_token == TokenType.TIME: 2972 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2973 else: 2974 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2975 2976 maybe_func = maybe_func and value is None 2977 2978 if value is None: 2979 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2980 elif type_token == TokenType.INTERVAL: 2981 unit = self._parse_var() 2982 2983 if not unit: 2984 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2985 else: 2986 value = self.expression(exp.Interval, unit=unit) 2987 2988 if maybe_func and check_func: 2989 index2 = self._index 2990 peek = self._parse_string() 2991 2992 if not peek: 2993 self._retreat(index) 2994 return None 2995 2996 self._retreat(index2) 2997 2998 if value: 2999 return value 3000 3001 return exp.DataType( 3002 this=exp.DataType.Type[type_token.value.upper()], 3003 expressions=expressions, 3004 nested=nested, 3005 values=values, 3006 prefix=prefix, 3007 ) 3008 3009 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3010 this = self._parse_type() or self._parse_id_var() 3011 self._match(TokenType.COLON) 3012 return self._parse_column_def(this) 3013 3014 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3015 if not self._match_text_seq("AT", "TIME", "ZONE"): 3016 return this 3017 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3018 3019 def _parse_column(self) -> t.Optional[exp.Expression]: 3020 this = self._parse_field() 3021 if isinstance(this, exp.Identifier): 3022 this = self.expression(exp.Column, this=this) 3023 elif not this: 3024 return self._parse_bracket(this) 3025 return self._parse_column_ops(this) 3026 3027 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3028 this = self._parse_bracket(this) 3029 3030 while self._match_set(self.COLUMN_OPERATORS): 3031 op_token = self._prev.token_type 3032 op = self.COLUMN_OPERATORS.get(op_token) 3033 3034 if op_token == TokenType.DCOLON: 3035 field = self._parse_types() 3036 if not field: 3037 self.raise_error("Expected type") 3038 elif op and self._curr: 3039 self._advance() 3040 value = self._prev.text 3041 field = ( 3042 exp.Literal.number(value) 3043 if self._prev.token_type == TokenType.NUMBER 3044 else exp.Literal.string(value) 3045 ) 3046 else: 3047 field = self._parse_field(anonymous_func=True) 3048 3049 if isinstance(field, exp.Func): 3050 # bigquery allows function calls like x.y.count(...) 3051 # SAFE.SUBSTR(...) 3052 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3053 this = self._replace_columns_with_dots(this) 3054 3055 if op: 3056 this = op(self, this, field) 3057 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3058 this = self.expression( 3059 exp.Column, 3060 this=field, 3061 table=this.this, 3062 db=this.args.get("table"), 3063 catalog=this.args.get("db"), 3064 ) 3065 else: 3066 this = self.expression(exp.Dot, this=this, expression=field) 3067 this = self._parse_bracket(this) 3068 return this 3069 3070 def _parse_primary(self) -> t.Optional[exp.Expression]: 3071 if self._match_set(self.PRIMARY_PARSERS): 3072 token_type = self._prev.token_type 3073 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3074 3075 if token_type == TokenType.STRING: 3076 expressions = [primary] 3077 while self._match(TokenType.STRING): 3078 expressions.append(exp.Literal.string(self._prev.text)) 3079 3080 if len(expressions) > 1: 3081 return self.expression(exp.Concat, expressions=expressions) 3082 3083 return primary 3084 3085 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3086 return exp.Literal.number(f"0.{self._prev.text}") 3087 3088 if self._match(TokenType.L_PAREN): 3089 comments = self._prev_comments 3090 query = self._parse_select() 3091 3092 if query: 3093 expressions = [query] 3094 else: 3095 expressions = self._parse_csv(self._parse_expression) 3096 3097 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3098 3099 if isinstance(this, exp.Subqueryable): 3100 this = self._parse_set_operations( 3101 self._parse_subquery(this=this, parse_alias=False) 3102 ) 3103 elif len(expressions) > 1: 3104 this = self.expression(exp.Tuple, expressions=expressions) 3105 else: 3106 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3107 3108 if this: 3109 this.add_comments(comments) 3110 3111 self._match_r_paren(expression=this) 3112 return this 3113 3114 return None 3115 3116 def _parse_field( 3117 self, 3118 any_token: bool = False, 3119 tokens: t.Optional[t.Collection[TokenType]] = None, 3120 anonymous_func: bool = False, 3121 ) -> t.Optional[exp.Expression]: 3122 return ( 3123 self._parse_primary() 3124 or self._parse_function(anonymous=anonymous_func) 3125 or self._parse_id_var(any_token=any_token, tokens=tokens) 3126 ) 3127 3128 def _parse_function( 3129 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3130 ) -> t.Optional[exp.Expression]: 3131 if not self._curr: 3132 return None 3133 3134 token_type = self._curr.token_type 3135 3136 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3137 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3138 3139 if not self._next or self._next.token_type != TokenType.L_PAREN: 3140 if token_type in self.NO_PAREN_FUNCTIONS: 3141 self._advance() 3142 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3143 3144 return None 3145 3146 if token_type not in self.FUNC_TOKENS: 3147 return None 3148 3149 this = self._curr.text 3150 upper = this.upper() 3151 self._advance(2) 3152 3153 parser = self.FUNCTION_PARSERS.get(upper) 3154 3155 if parser and not anonymous: 3156 this = parser(self) 3157 else: 3158 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3159 3160 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3161 this = self.expression(subquery_predicate, this=self._parse_select()) 3162 self._match_r_paren() 3163 return this 3164 3165 if functions is None: 3166 functions = self.FUNCTIONS 3167 3168 function = functions.get(upper) 3169 3170 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3171 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3172 3173 if function and not anonymous: 3174 this = self.validate_expression(function(args), args) 3175 else: 3176 this = self.expression(exp.Anonymous, this=this, expressions=args) 3177 3178 self._match_r_paren(this) 3179 return self._parse_window(this) 3180 3181 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3182 return self._parse_column_def(self._parse_id_var()) 3183 3184 def _parse_user_defined_function( 3185 self, kind: t.Optional[TokenType] = None 3186 ) -> t.Optional[exp.Expression]: 3187 this = self._parse_id_var() 3188 3189 while self._match(TokenType.DOT): 3190 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3191 3192 if not self._match(TokenType.L_PAREN): 3193 return this 3194 3195 expressions = self._parse_csv(self._parse_function_parameter) 3196 self._match_r_paren() 3197 return self.expression( 3198 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3199 ) 3200 3201 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3202 literal = self._parse_primary() 3203 if literal: 3204 return self.expression(exp.Introducer, this=token.text, expression=literal) 3205 3206 return self.expression(exp.Identifier, this=token.text) 3207 3208 def _parse_session_parameter(self) -> exp.SessionParameter: 3209 kind = None 3210 this = self._parse_id_var() or self._parse_primary() 3211 3212 if this and self._match(TokenType.DOT): 3213 kind = this.name 3214 this = self._parse_var() or self._parse_primary() 3215 3216 return self.expression(exp.SessionParameter, this=this, kind=kind) 3217 3218 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3219 index = self._index 3220 3221 if self._match(TokenType.L_PAREN): 3222 expressions = self._parse_csv(self._parse_id_var) 3223 3224 if not self._match(TokenType.R_PAREN): 3225 self._retreat(index) 3226 else: 3227 expressions = [self._parse_id_var()] 3228 3229 if self._match_set(self.LAMBDAS): 3230 return self.LAMBDAS[self._prev.token_type](self, expressions) 3231 3232 self._retreat(index) 3233 3234 this: t.Optional[exp.Expression] 3235 3236 if self._match(TokenType.DISTINCT): 3237 this = self.expression( 3238 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3239 ) 3240 else: 3241 this = self._parse_select_or_expression(alias=alias) 3242 3243 if isinstance(this, exp.EQ): 3244 left = this.this 3245 if isinstance(left, exp.Column): 3246 left.replace(exp.var(left.text("this"))) 3247 3248 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3249 3250 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3251 index = self._index 3252 3253 if not self.errors: 3254 try: 3255 if self._parse_select(nested=True): 3256 return this 3257 except ParseError: 3258 pass 3259 finally: 3260 self.errors.clear() 3261 self._retreat(index) 3262 3263 if not self._match(TokenType.L_PAREN): 3264 return this 3265 3266 args = self._parse_csv( 3267 lambda: self._parse_constraint() 3268 or self._parse_column_def(self._parse_field(any_token=True)) 3269 ) 3270 3271 self._match_r_paren() 3272 return self.expression(exp.Schema, this=this, expressions=args) 3273 3274 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3275 # column defs are not really columns, they're identifiers 3276 if isinstance(this, exp.Column): 3277 this = this.this 3278 3279 kind = self._parse_types(schema=True) 3280 3281 if self._match_text_seq("FOR", "ORDINALITY"): 3282 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3283 3284 constraints = [] 3285 while True: 3286 constraint = self._parse_column_constraint() 3287 if not constraint: 3288 break 3289 constraints.append(constraint) 3290 3291 if not kind and not constraints: 3292 return this 3293 3294 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3295 3296 def _parse_auto_increment( 3297 self, 3298 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3299 start = None 3300 increment = None 3301 3302 if self._match(TokenType.L_PAREN, advance=False): 3303 args = self._parse_wrapped_csv(self._parse_bitwise) 3304 start = seq_get(args, 0) 3305 increment = seq_get(args, 1) 3306 elif self._match_text_seq("START"): 3307 start = self._parse_bitwise() 3308 self._match_text_seq("INCREMENT") 3309 increment = self._parse_bitwise() 3310 3311 if start and increment: 3312 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3313 3314 return exp.AutoIncrementColumnConstraint() 3315 3316 def _parse_compress(self) -> exp.CompressColumnConstraint: 3317 if self._match(TokenType.L_PAREN, advance=False): 3318 return self.expression( 3319 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3320 ) 3321 3322 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3323 3324 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3325 if self._match_text_seq("BY", "DEFAULT"): 3326 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3327 this = self.expression( 3328 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3329 ) 3330 else: 3331 self._match_text_seq("ALWAYS") 3332 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3333 3334 self._match(TokenType.ALIAS) 3335 identity = self._match_text_seq("IDENTITY") 3336 3337 if self._match(TokenType.L_PAREN): 3338 if self._match_text_seq("START", "WITH"): 3339 this.set("start", self._parse_bitwise()) 3340 if self._match_text_seq("INCREMENT", "BY"): 3341 this.set("increment", self._parse_bitwise()) 3342 if self._match_text_seq("MINVALUE"): 3343 this.set("minvalue", self._parse_bitwise()) 3344 if self._match_text_seq("MAXVALUE"): 3345 this.set("maxvalue", self._parse_bitwise()) 3346 3347 if self._match_text_seq("CYCLE"): 3348 this.set("cycle", True) 3349 elif self._match_text_seq("NO", "CYCLE"): 3350 this.set("cycle", False) 3351 3352 if not identity: 3353 this.set("expression", self._parse_bitwise()) 3354 3355 self._match_r_paren() 3356 3357 return this 3358 3359 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3360 self._match_text_seq("LENGTH") 3361 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3362 3363 def _parse_not_constraint( 3364 self, 3365 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3366 if self._match_text_seq("NULL"): 3367 return self.expression(exp.NotNullColumnConstraint) 3368 if self._match_text_seq("CASESPECIFIC"): 3369 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3370 return None 3371 3372 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3373 if self._match(TokenType.CONSTRAINT): 3374 this = self._parse_id_var() 3375 else: 3376 this = None 3377 3378 if self._match_texts(self.CONSTRAINT_PARSERS): 3379 return self.expression( 3380 exp.ColumnConstraint, 3381 this=this, 3382 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3383 ) 3384 3385 return this 3386 3387 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3388 if not self._match(TokenType.CONSTRAINT): 3389 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3390 3391 this = self._parse_id_var() 3392 expressions = [] 3393 3394 while True: 3395 constraint = self._parse_unnamed_constraint() or self._parse_function() 3396 if not constraint: 3397 break 3398 expressions.append(constraint) 3399 3400 return self.expression(exp.Constraint, this=this, expressions=expressions) 3401 3402 def _parse_unnamed_constraint( 3403 self, constraints: t.Optional[t.Collection[str]] = None 3404 ) -> t.Optional[exp.Expression]: 3405 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3406 return None 3407 3408 constraint = self._prev.text.upper() 3409 if constraint not in self.CONSTRAINT_PARSERS: 3410 self.raise_error(f"No parser found for schema constraint {constraint}.") 3411 3412 return self.CONSTRAINT_PARSERS[constraint](self) 3413 3414 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3415 self._match_text_seq("KEY") 3416 return self.expression( 3417 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3418 ) 3419 3420 def _parse_key_constraint_options(self) -> t.List[str]: 3421 options = [] 3422 while True: 3423 if not self._curr: 3424 break 3425 3426 if self._match(TokenType.ON): 3427 action = None 3428 on = self._advance_any() and self._prev.text 3429 3430 if self._match_text_seq("NO", "ACTION"): 3431 action = "NO ACTION" 3432 elif self._match_text_seq("CASCADE"): 3433 action = "CASCADE" 3434 elif self._match_pair(TokenType.SET, TokenType.NULL): 3435 action = "SET NULL" 3436 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3437 action = "SET DEFAULT" 3438 else: 3439 self.raise_error("Invalid key constraint") 3440 3441 options.append(f"ON {on} {action}") 3442 elif self._match_text_seq("NOT", "ENFORCED"): 3443 options.append("NOT ENFORCED") 3444 elif self._match_text_seq("DEFERRABLE"): 3445 options.append("DEFERRABLE") 3446 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3447 options.append("INITIALLY DEFERRED") 3448 elif self._match_text_seq("NORELY"): 3449 options.append("NORELY") 3450 elif self._match_text_seq("MATCH", "FULL"): 3451 options.append("MATCH FULL") 3452 else: 3453 break 3454 3455 return options 3456 3457 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3458 if match and not self._match(TokenType.REFERENCES): 3459 return None 3460 3461 expressions = None 3462 this = self._parse_id_var() 3463 3464 if self._match(TokenType.L_PAREN, advance=False): 3465 expressions = self._parse_wrapped_id_vars() 3466 3467 options = self._parse_key_constraint_options() 3468 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3469 3470 def _parse_foreign_key(self) -> exp.ForeignKey: 3471 expressions = self._parse_wrapped_id_vars() 3472 reference = self._parse_references() 3473 options = {} 3474 3475 while self._match(TokenType.ON): 3476 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3477 self.raise_error("Expected DELETE or UPDATE") 3478 3479 kind = self._prev.text.lower() 3480 3481 if self._match_text_seq("NO", "ACTION"): 3482 action = "NO ACTION" 3483 elif self._match(TokenType.SET): 3484 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3485 action = "SET " + self._prev.text.upper() 3486 else: 3487 self._advance() 3488 action = self._prev.text.upper() 3489 3490 options[kind] = action 3491 3492 return self.expression( 3493 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3494 ) 3495 3496 def _parse_primary_key( 3497 self, wrapped_optional: bool = False, in_props: bool = False 3498 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3499 desc = ( 3500 self._match_set((TokenType.ASC, TokenType.DESC)) 3501 and self._prev.token_type == TokenType.DESC 3502 ) 3503 3504 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3505 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3506 3507 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3508 options = self._parse_key_constraint_options() 3509 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3510 3511 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3512 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3513 return this 3514 3515 bracket_kind = self._prev.token_type 3516 3517 if self._match(TokenType.COLON): 3518 expressions: t.List[t.Optional[exp.Expression]] = [ 3519 self.expression(exp.Slice, expression=self._parse_conjunction()) 3520 ] 3521 else: 3522 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3523 3524 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3525 if bracket_kind == TokenType.L_BRACE: 3526 this = self.expression(exp.Struct, expressions=expressions) 3527 elif not this or this.name.upper() == "ARRAY": 3528 this = self.expression(exp.Array, expressions=expressions) 3529 else: 3530 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3531 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3532 3533 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3534 self.raise_error("Expected ]") 3535 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3536 self.raise_error("Expected }") 3537 3538 self._add_comments(this) 3539 return self._parse_bracket(this) 3540 3541 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3542 if self._match(TokenType.COLON): 3543 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3544 return this 3545 3546 def _parse_case(self) -> t.Optional[exp.Expression]: 3547 ifs = [] 3548 default = None 3549 3550 expression = self._parse_conjunction() 3551 3552 while self._match(TokenType.WHEN): 3553 this = self._parse_conjunction() 3554 self._match(TokenType.THEN) 3555 then = self._parse_conjunction() 3556 ifs.append(self.expression(exp.If, this=this, true=then)) 3557 3558 if self._match(TokenType.ELSE): 3559 default = self._parse_conjunction() 3560 3561 if not self._match(TokenType.END): 3562 self.raise_error("Expected END after CASE", self._prev) 3563 3564 return self._parse_window( 3565 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3566 ) 3567 3568 def _parse_if(self) -> t.Optional[exp.Expression]: 3569 if self._match(TokenType.L_PAREN): 3570 args = self._parse_csv(self._parse_conjunction) 3571 this = self.validate_expression(exp.If.from_arg_list(args), args) 3572 self._match_r_paren() 3573 else: 3574 index = self._index - 1 3575 condition = self._parse_conjunction() 3576 3577 if not condition: 3578 self._retreat(index) 3579 return None 3580 3581 self._match(TokenType.THEN) 3582 true = self._parse_conjunction() 3583 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3584 self._match(TokenType.END) 3585 this = self.expression(exp.If, this=condition, true=true, false=false) 3586 3587 return self._parse_window(this) 3588 3589 def _parse_extract(self) -> exp.Extract: 3590 this = self._parse_function() or self._parse_var() or self._parse_type() 3591 3592 if self._match(TokenType.FROM): 3593 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3594 3595 if not self._match(TokenType.COMMA): 3596 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3597 3598 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3599 3600 def _parse_cast(self, strict: bool) -> exp.Expression: 3601 this = self._parse_conjunction() 3602 3603 if not self._match(TokenType.ALIAS): 3604 if self._match(TokenType.COMMA): 3605 return self.expression( 3606 exp.CastToStrType, this=this, expression=self._parse_string() 3607 ) 3608 else: 3609 self.raise_error("Expected AS after CAST") 3610 3611 to = self._parse_types() 3612 3613 if not to: 3614 self.raise_error("Expected TYPE after CAST") 3615 elif to.this == exp.DataType.Type.CHAR: 3616 if self._match(TokenType.CHARACTER_SET): 3617 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3618 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3619 fmt = self._parse_string() 3620 3621 return self.expression( 3622 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3623 this=this, 3624 format=exp.Literal.string( 3625 format_time( 3626 fmt.this if fmt else "", 3627 self.FORMAT_MAPPING or self.TIME_MAPPING, 3628 self.FORMAT_TRIE or self.TIME_TRIE, 3629 ) 3630 ), 3631 ) 3632 3633 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3634 3635 def _parse_concat(self) -> t.Optional[exp.Expression]: 3636 args = self._parse_csv(self._parse_conjunction) 3637 if self.CONCAT_NULL_OUTPUTS_STRING: 3638 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3639 3640 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3641 # we find such a call we replace it with its argument. 3642 if len(args) == 1: 3643 return args[0] 3644 3645 return self.expression( 3646 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3647 ) 3648 3649 def _parse_string_agg(self) -> exp.Expression: 3650 expression: t.Optional[exp.Expression] 3651 3652 if self._match(TokenType.DISTINCT): 3653 args = self._parse_csv(self._parse_conjunction) 3654 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3655 else: 3656 args = self._parse_csv(self._parse_conjunction) 3657 expression = seq_get(args, 0) 3658 3659 index = self._index 3660 if not self._match(TokenType.R_PAREN): 3661 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3662 order = self._parse_order(this=expression) 3663 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3664 3665 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3666 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3667 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3668 if not self._match_text_seq("WITHIN", "GROUP"): 3669 self._retreat(index) 3670 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3671 3672 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3673 order = self._parse_order(this=expression) 3674 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3675 3676 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3677 to: t.Optional[exp.Expression] 3678 this = self._parse_bitwise() 3679 3680 if self._match(TokenType.USING): 3681 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3682 elif self._match(TokenType.COMMA): 3683 to = self._parse_bitwise() 3684 else: 3685 to = None 3686 3687 # Swap the argument order if needed to produce the correct AST 3688 if self.CONVERT_TYPE_FIRST: 3689 this, to = to, this 3690 3691 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3692 3693 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3694 """ 3695 There are generally two variants of the DECODE function: 3696 3697 - DECODE(bin, charset) 3698 - DECODE(expression, search, result [, search, result] ... [, default]) 3699 3700 The second variant will always be parsed into a CASE expression. Note that NULL 3701 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3702 instead of relying on pattern matching. 3703 """ 3704 args = self._parse_csv(self._parse_conjunction) 3705 3706 if len(args) < 3: 3707 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3708 3709 expression, *expressions = args 3710 if not expression: 3711 return None 3712 3713 ifs = [] 3714 for search, result in zip(expressions[::2], expressions[1::2]): 3715 if not search or not result: 3716 return None 3717 3718 if isinstance(search, exp.Literal): 3719 ifs.append( 3720 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3721 ) 3722 elif isinstance(search, exp.Null): 3723 ifs.append( 3724 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3725 ) 3726 else: 3727 cond = exp.or_( 3728 exp.EQ(this=expression.copy(), expression=search), 3729 exp.and_( 3730 exp.Is(this=expression.copy(), expression=exp.Null()), 3731 exp.Is(this=search.copy(), expression=exp.Null()), 3732 copy=False, 3733 ), 3734 copy=False, 3735 ) 3736 ifs.append(exp.If(this=cond, true=result)) 3737 3738 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3739 3740 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3741 self._match_text_seq("KEY") 3742 key = self._parse_field() 3743 self._match(TokenType.COLON) 3744 self._match_text_seq("VALUE") 3745 value = self._parse_field() 3746 3747 if not key and not value: 3748 return None 3749 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3750 3751 def _parse_json_object(self) -> exp.JSONObject: 3752 expressions = self._parse_csv(self._parse_json_key_value) 3753 3754 null_handling = None 3755 if self._match_text_seq("NULL", "ON", "NULL"): 3756 null_handling = "NULL ON NULL" 3757 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3758 null_handling = "ABSENT ON NULL" 3759 3760 unique_keys = None 3761 if self._match_text_seq("WITH", "UNIQUE"): 3762 unique_keys = True 3763 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3764 unique_keys = False 3765 3766 self._match_text_seq("KEYS") 3767 3768 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3769 format_json = self._match_text_seq("FORMAT", "JSON") 3770 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3771 3772 return self.expression( 3773 exp.JSONObject, 3774 expressions=expressions, 3775 null_handling=null_handling, 3776 unique_keys=unique_keys, 3777 return_type=return_type, 3778 format_json=format_json, 3779 encoding=encoding, 3780 ) 3781 3782 def _parse_logarithm(self) -> exp.Func: 3783 # Default argument order is base, expression 3784 args = self._parse_csv(self._parse_range) 3785 3786 if len(args) > 1: 3787 if not self.LOG_BASE_FIRST: 3788 args.reverse() 3789 return exp.Log.from_arg_list(args) 3790 3791 return self.expression( 3792 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3793 ) 3794 3795 def _parse_match_against(self) -> exp.MatchAgainst: 3796 expressions = self._parse_csv(self._parse_column) 3797 3798 self._match_text_seq(")", "AGAINST", "(") 3799 3800 this = self._parse_string() 3801 3802 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3803 modifier = "IN NATURAL LANGUAGE MODE" 3804 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3805 modifier = f"{modifier} WITH QUERY EXPANSION" 3806 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3807 modifier = "IN BOOLEAN MODE" 3808 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3809 modifier = "WITH QUERY EXPANSION" 3810 else: 3811 modifier = None 3812 3813 return self.expression( 3814 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3815 ) 3816 3817 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3818 def _parse_open_json(self) -> exp.OpenJSON: 3819 this = self._parse_bitwise() 3820 path = self._match(TokenType.COMMA) and self._parse_string() 3821 3822 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3823 this = self._parse_field(any_token=True) 3824 kind = self._parse_types() 3825 path = self._parse_string() 3826 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3827 3828 return self.expression( 3829 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3830 ) 3831 3832 expressions = None 3833 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3834 self._match_l_paren() 3835 expressions = self._parse_csv(_parse_open_json_column_def) 3836 3837 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3838 3839 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3840 args = self._parse_csv(self._parse_bitwise) 3841 3842 if self._match(TokenType.IN): 3843 return self.expression( 3844 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3845 ) 3846 3847 if haystack_first: 3848 haystack = seq_get(args, 0) 3849 needle = seq_get(args, 1) 3850 else: 3851 needle = seq_get(args, 0) 3852 haystack = seq_get(args, 1) 3853 3854 return self.expression( 3855 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3856 ) 3857 3858 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3859 args = self._parse_csv(self._parse_table) 3860 return exp.JoinHint(this=func_name.upper(), expressions=args) 3861 3862 def _parse_substring(self) -> exp.Substring: 3863 # Postgres supports the form: substring(string [from int] [for int]) 3864 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3865 3866 args = self._parse_csv(self._parse_bitwise) 3867 3868 if self._match(TokenType.FROM): 3869 args.append(self._parse_bitwise()) 3870 if self._match(TokenType.FOR): 3871 args.append(self._parse_bitwise()) 3872 3873 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3874 3875 def _parse_trim(self) -> exp.Trim: 3876 # https://www.w3resource.com/sql/character-functions/trim.php 3877 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3878 3879 position = None 3880 collation = None 3881 3882 if self._match_texts(self.TRIM_TYPES): 3883 position = self._prev.text.upper() 3884 3885 expression = self._parse_bitwise() 3886 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3887 this = self._parse_bitwise() 3888 else: 3889 this = expression 3890 expression = None 3891 3892 if self._match(TokenType.COLLATE): 3893 collation = self._parse_bitwise() 3894 3895 return self.expression( 3896 exp.Trim, this=this, position=position, expression=expression, collation=collation 3897 ) 3898 3899 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3900 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3901 3902 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3903 return self._parse_window(self._parse_id_var(), alias=True) 3904 3905 def _parse_respect_or_ignore_nulls( 3906 self, this: t.Optional[exp.Expression] 3907 ) -> t.Optional[exp.Expression]: 3908 if self._match_text_seq("IGNORE", "NULLS"): 3909 return self.expression(exp.IgnoreNulls, this=this) 3910 if self._match_text_seq("RESPECT", "NULLS"): 3911 return self.expression(exp.RespectNulls, this=this) 3912 return this 3913 3914 def _parse_window( 3915 self, this: t.Optional[exp.Expression], alias: bool = False 3916 ) -> t.Optional[exp.Expression]: 3917 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3918 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3919 self._match_r_paren() 3920 3921 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3922 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3923 if self._match_text_seq("WITHIN", "GROUP"): 3924 order = self._parse_wrapped(self._parse_order) 3925 this = self.expression(exp.WithinGroup, this=this, expression=order) 3926 3927 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3928 # Some dialects choose to implement and some do not. 3929 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3930 3931 # There is some code above in _parse_lambda that handles 3932 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3933 3934 # The below changes handle 3935 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3936 3937 # Oracle allows both formats 3938 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3939 # and Snowflake chose to do the same for familiarity 3940 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3941 this = self._parse_respect_or_ignore_nulls(this) 3942 3943 # bigquery select from window x AS (partition by ...) 3944 if alias: 3945 over = None 3946 self._match(TokenType.ALIAS) 3947 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3948 return this 3949 else: 3950 over = self._prev.text.upper() 3951 3952 if not self._match(TokenType.L_PAREN): 3953 return self.expression( 3954 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3955 ) 3956 3957 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3958 3959 first = self._match(TokenType.FIRST) 3960 if self._match_text_seq("LAST"): 3961 first = False 3962 3963 partition = self._parse_partition_by() 3964 order = self._parse_order() 3965 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3966 3967 if kind: 3968 self._match(TokenType.BETWEEN) 3969 start = self._parse_window_spec() 3970 self._match(TokenType.AND) 3971 end = self._parse_window_spec() 3972 3973 spec = self.expression( 3974 exp.WindowSpec, 3975 kind=kind, 3976 start=start["value"], 3977 start_side=start["side"], 3978 end=end["value"], 3979 end_side=end["side"], 3980 ) 3981 else: 3982 spec = None 3983 3984 self._match_r_paren() 3985 3986 return self.expression( 3987 exp.Window, 3988 this=this, 3989 partition_by=partition, 3990 order=order, 3991 spec=spec, 3992 alias=window_alias, 3993 over=over, 3994 first=first, 3995 ) 3996 3997 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3998 self._match(TokenType.BETWEEN) 3999 4000 return { 4001 "value": ( 4002 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4003 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4004 or self._parse_bitwise() 4005 ), 4006 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4007 } 4008 4009 def _parse_alias( 4010 self, this: t.Optional[exp.Expression], explicit: bool = False 4011 ) -> t.Optional[exp.Expression]: 4012 any_token = self._match(TokenType.ALIAS) 4013 4014 if explicit and not any_token: 4015 return this 4016 4017 if self._match(TokenType.L_PAREN): 4018 aliases = self.expression( 4019 exp.Aliases, 4020 this=this, 4021 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4022 ) 4023 self._match_r_paren(aliases) 4024 return aliases 4025 4026 alias = self._parse_id_var(any_token) 4027 4028 if alias: 4029 return self.expression(exp.Alias, this=this, alias=alias) 4030 4031 return this 4032 4033 def _parse_id_var( 4034 self, 4035 any_token: bool = True, 4036 tokens: t.Optional[t.Collection[TokenType]] = None, 4037 ) -> t.Optional[exp.Expression]: 4038 identifier = self._parse_identifier() 4039 4040 if identifier: 4041 return identifier 4042 4043 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4044 quoted = self._prev.token_type == TokenType.STRING 4045 return exp.Identifier(this=self._prev.text, quoted=quoted) 4046 4047 return None 4048 4049 def _parse_string(self) -> t.Optional[exp.Expression]: 4050 if self._match(TokenType.STRING): 4051 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4052 return self._parse_placeholder() 4053 4054 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4055 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4056 4057 def _parse_number(self) -> t.Optional[exp.Expression]: 4058 if self._match(TokenType.NUMBER): 4059 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4060 return self._parse_placeholder() 4061 4062 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4063 if self._match(TokenType.IDENTIFIER): 4064 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4065 return self._parse_placeholder() 4066 4067 def _parse_var( 4068 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4069 ) -> t.Optional[exp.Expression]: 4070 if ( 4071 (any_token and self._advance_any()) 4072 or self._match(TokenType.VAR) 4073 or (self._match_set(tokens) if tokens else False) 4074 ): 4075 return self.expression(exp.Var, this=self._prev.text) 4076 return self._parse_placeholder() 4077 4078 def _advance_any(self) -> t.Optional[Token]: 4079 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4080 self._advance() 4081 return self._prev 4082 return None 4083 4084 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4085 return self._parse_var() or self._parse_string() 4086 4087 def _parse_null(self) -> t.Optional[exp.Expression]: 4088 if self._match(TokenType.NULL): 4089 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4090 return None 4091 4092 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4093 if self._match(TokenType.TRUE): 4094 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4095 if self._match(TokenType.FALSE): 4096 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4097 return None 4098 4099 def _parse_star(self) -> t.Optional[exp.Expression]: 4100 if self._match(TokenType.STAR): 4101 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4102 return None 4103 4104 def _parse_parameter(self) -> exp.Parameter: 4105 wrapped = self._match(TokenType.L_BRACE) 4106 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4107 self._match(TokenType.R_BRACE) 4108 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4109 4110 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4111 if self._match_set(self.PLACEHOLDER_PARSERS): 4112 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4113 if placeholder: 4114 return placeholder 4115 self._advance(-1) 4116 return None 4117 4118 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4119 if not self._match(TokenType.EXCEPT): 4120 return None 4121 if self._match(TokenType.L_PAREN, advance=False): 4122 return self._parse_wrapped_csv(self._parse_column) 4123 return self._parse_csv(self._parse_column) 4124 4125 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4126 if not self._match(TokenType.REPLACE): 4127 return None 4128 if self._match(TokenType.L_PAREN, advance=False): 4129 return self._parse_wrapped_csv(self._parse_expression) 4130 return self._parse_csv(self._parse_expression) 4131 4132 def _parse_csv( 4133 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4134 ) -> t.List[t.Optional[exp.Expression]]: 4135 parse_result = parse_method() 4136 items = [parse_result] if parse_result is not None else [] 4137 4138 while self._match(sep): 4139 self._add_comments(parse_result) 4140 parse_result = parse_method() 4141 if parse_result is not None: 4142 items.append(parse_result) 4143 4144 return items 4145 4146 def _parse_tokens( 4147 self, parse_method: t.Callable, expressions: t.Dict 4148 ) -> t.Optional[exp.Expression]: 4149 this = parse_method() 4150 4151 while self._match_set(expressions): 4152 this = self.expression( 4153 expressions[self._prev.token_type], 4154 this=this, 4155 comments=self._prev_comments, 4156 expression=parse_method(), 4157 ) 4158 4159 return this 4160 4161 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4162 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4163 4164 def _parse_wrapped_csv( 4165 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4166 ) -> t.List[t.Optional[exp.Expression]]: 4167 return self._parse_wrapped( 4168 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4169 ) 4170 4171 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4172 wrapped = self._match(TokenType.L_PAREN) 4173 if not wrapped and not optional: 4174 self.raise_error("Expecting (") 4175 parse_result = parse_method() 4176 if wrapped: 4177 self._match_r_paren() 4178 return parse_result 4179 4180 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4181 return self._parse_select() or self._parse_set_operations( 4182 self._parse_expression() if alias else self._parse_conjunction() 4183 ) 4184 4185 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4186 return self._parse_query_modifiers( 4187 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4188 ) 4189 4190 def _parse_transaction(self) -> exp.Transaction: 4191 this = None 4192 if self._match_texts(self.TRANSACTION_KIND): 4193 this = self._prev.text 4194 4195 self._match_texts({"TRANSACTION", "WORK"}) 4196 4197 modes = [] 4198 while True: 4199 mode = [] 4200 while self._match(TokenType.VAR): 4201 mode.append(self._prev.text) 4202 4203 if mode: 4204 modes.append(" ".join(mode)) 4205 if not self._match(TokenType.COMMA): 4206 break 4207 4208 return self.expression(exp.Transaction, this=this, modes=modes) 4209 4210 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4211 chain = None 4212 savepoint = None 4213 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4214 4215 self._match_texts({"TRANSACTION", "WORK"}) 4216 4217 if self._match_text_seq("TO"): 4218 self._match_text_seq("SAVEPOINT") 4219 savepoint = self._parse_id_var() 4220 4221 if self._match(TokenType.AND): 4222 chain = not self._match_text_seq("NO") 4223 self._match_text_seq("CHAIN") 4224 4225 if is_rollback: 4226 return self.expression(exp.Rollback, savepoint=savepoint) 4227 4228 return self.expression(exp.Commit, chain=chain) 4229 4230 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4231 if not self._match_text_seq("ADD"): 4232 return None 4233 4234 self._match(TokenType.COLUMN) 4235 exists_column = self._parse_exists(not_=True) 4236 expression = self._parse_column_def(self._parse_field(any_token=True)) 4237 4238 if expression: 4239 expression.set("exists", exists_column) 4240 4241 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4242 if self._match_texts(("FIRST", "AFTER")): 4243 position = self._prev.text 4244 column_position = self.expression( 4245 exp.ColumnPosition, this=self._parse_column(), position=position 4246 ) 4247 expression.set("position", column_position) 4248 4249 return expression 4250 4251 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4252 drop = self._match(TokenType.DROP) and self._parse_drop() 4253 if drop and not isinstance(drop, exp.Command): 4254 drop.set("kind", drop.args.get("kind", "COLUMN")) 4255 return drop 4256 4257 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4258 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4259 return self.expression( 4260 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4261 ) 4262 4263 def _parse_add_constraint(self) -> exp.AddConstraint: 4264 this = None 4265 kind = self._prev.token_type 4266 4267 if kind == TokenType.CONSTRAINT: 4268 this = self._parse_id_var() 4269 4270 if self._match_text_seq("CHECK"): 4271 expression = self._parse_wrapped(self._parse_conjunction) 4272 enforced = self._match_text_seq("ENFORCED") 4273 4274 return self.expression( 4275 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4276 ) 4277 4278 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4279 expression = self._parse_foreign_key() 4280 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4281 expression = self._parse_primary_key() 4282 else: 4283 expression = None 4284 4285 return self.expression(exp.AddConstraint, this=this, expression=expression) 4286 4287 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4288 index = self._index - 1 4289 4290 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4291 return self._parse_csv(self._parse_add_constraint) 4292 4293 self._retreat(index) 4294 return self._parse_csv(self._parse_add_column) 4295 4296 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4297 self._match(TokenType.COLUMN) 4298 column = self._parse_field(any_token=True) 4299 4300 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4301 return self.expression(exp.AlterColumn, this=column, drop=True) 4302 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4303 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4304 4305 self._match_text_seq("SET", "DATA") 4306 return self.expression( 4307 exp.AlterColumn, 4308 this=column, 4309 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4310 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4311 using=self._match(TokenType.USING) and self._parse_conjunction(), 4312 ) 4313 4314 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4315 index = self._index - 1 4316 4317 partition_exists = self._parse_exists() 4318 if self._match(TokenType.PARTITION, advance=False): 4319 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4320 4321 self._retreat(index) 4322 return self._parse_csv(self._parse_drop_column) 4323 4324 def _parse_alter_table_rename(self) -> exp.RenameTable: 4325 self._match_text_seq("TO") 4326 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4327 4328 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4329 start = self._prev 4330 4331 if not self._match(TokenType.TABLE): 4332 return self._parse_as_command(start) 4333 4334 exists = self._parse_exists() 4335 this = self._parse_table(schema=True) 4336 4337 if self._next: 4338 self._advance() 4339 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4340 4341 if parser: 4342 actions = ensure_list(parser(self)) 4343 4344 if not self._curr: 4345 return self.expression( 4346 exp.AlterTable, 4347 this=this, 4348 exists=exists, 4349 actions=actions, 4350 ) 4351 return self._parse_as_command(start) 4352 4353 def _parse_merge(self) -> exp.Merge: 4354 self._match(TokenType.INTO) 4355 target = self._parse_table() 4356 4357 self._match(TokenType.USING) 4358 using = self._parse_table() 4359 4360 self._match(TokenType.ON) 4361 on = self._parse_conjunction() 4362 4363 whens = [] 4364 while self._match(TokenType.WHEN): 4365 matched = not self._match(TokenType.NOT) 4366 self._match_text_seq("MATCHED") 4367 source = ( 4368 False 4369 if self._match_text_seq("BY", "TARGET") 4370 else self._match_text_seq("BY", "SOURCE") 4371 ) 4372 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4373 4374 self._match(TokenType.THEN) 4375 4376 if self._match(TokenType.INSERT): 4377 _this = self._parse_star() 4378 if _this: 4379 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4380 else: 4381 then = self.expression( 4382 exp.Insert, 4383 this=self._parse_value(), 4384 expression=self._match(TokenType.VALUES) and self._parse_value(), 4385 ) 4386 elif self._match(TokenType.UPDATE): 4387 expressions = self._parse_star() 4388 if expressions: 4389 then = self.expression(exp.Update, expressions=expressions) 4390 else: 4391 then = self.expression( 4392 exp.Update, 4393 expressions=self._match(TokenType.SET) 4394 and self._parse_csv(self._parse_equality), 4395 ) 4396 elif self._match(TokenType.DELETE): 4397 then = self.expression(exp.Var, this=self._prev.text) 4398 else: 4399 then = None 4400 4401 whens.append( 4402 self.expression( 4403 exp.When, 4404 matched=matched, 4405 source=source, 4406 condition=condition, 4407 then=then, 4408 ) 4409 ) 4410 4411 return self.expression( 4412 exp.Merge, 4413 this=target, 4414 using=using, 4415 on=on, 4416 expressions=whens, 4417 ) 4418 4419 def _parse_show(self) -> t.Optional[exp.Expression]: 4420 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4421 if parser: 4422 return parser(self) 4423 self._advance() 4424 return self.expression(exp.Show, this=self._prev.text.upper()) 4425 4426 def _parse_set_item_assignment( 4427 self, kind: t.Optional[str] = None 4428 ) -> t.Optional[exp.Expression]: 4429 index = self._index 4430 4431 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4432 return self._parse_set_transaction(global_=kind == "GLOBAL") 4433 4434 left = self._parse_primary() or self._parse_id_var() 4435 4436 if not self._match_texts(("=", "TO")): 4437 self._retreat(index) 4438 return None 4439 4440 right = self._parse_statement() or self._parse_id_var() 4441 this = self.expression(exp.EQ, this=left, expression=right) 4442 4443 return self.expression(exp.SetItem, this=this, kind=kind) 4444 4445 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4446 self._match_text_seq("TRANSACTION") 4447 characteristics = self._parse_csv( 4448 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4449 ) 4450 return self.expression( 4451 exp.SetItem, 4452 expressions=characteristics, 4453 kind="TRANSACTION", 4454 **{"global": global_}, # type: ignore 4455 ) 4456 4457 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4458 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4459 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4460 4461 def _parse_set(self) -> exp.Set | exp.Command: 4462 index = self._index 4463 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4464 4465 if self._curr: 4466 self._retreat(index) 4467 return self._parse_as_command(self._prev) 4468 4469 return set_ 4470 4471 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4472 for option in options: 4473 if self._match_text_seq(*option.split(" ")): 4474 return exp.var(option) 4475 return None 4476 4477 def _parse_as_command(self, start: Token) -> exp.Command: 4478 while self._curr: 4479 self._advance() 4480 text = self._find_sql(start, self._prev) 4481 size = len(start.text) 4482 return exp.Command(this=text[:size], expression=text[size:]) 4483 4484 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4485 settings = [] 4486 4487 self._match_l_paren() 4488 kind = self._parse_id_var() 4489 4490 if self._match(TokenType.L_PAREN): 4491 while True: 4492 key = self._parse_id_var() 4493 value = self._parse_primary() 4494 4495 if not key and value is None: 4496 break 4497 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4498 self._match(TokenType.R_PAREN) 4499 4500 self._match_r_paren() 4501 4502 return self.expression( 4503 exp.DictProperty, 4504 this=this, 4505 kind=kind.this if kind else None, 4506 settings=settings, 4507 ) 4508 4509 def _parse_dict_range(self, this: str) -> exp.DictRange: 4510 self._match_l_paren() 4511 has_min = self._match_text_seq("MIN") 4512 if has_min: 4513 min = self._parse_var() or self._parse_primary() 4514 self._match_text_seq("MAX") 4515 max = self._parse_var() or self._parse_primary() 4516 else: 4517 max = self._parse_var() or self._parse_primary() 4518 min = exp.Literal.number(0) 4519 self._match_r_paren() 4520 return self.expression(exp.DictRange, this=this, min=min, max=max) 4521 4522 def _find_parser( 4523 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4524 ) -> t.Optional[t.Callable]: 4525 if not self._curr: 4526 return None 4527 4528 index = self._index 4529 this = [] 4530 while True: 4531 # The current token might be multiple words 4532 curr = self._curr.text.upper() 4533 key = curr.split(" ") 4534 this.append(curr) 4535 self._advance() 4536 result, trie = in_trie(trie, key) 4537 if result == 0: 4538 break 4539 if result == 2: 4540 subparser = parsers[" ".join(this)] 4541 return subparser 4542 self._retreat(index) 4543 return None 4544 4545 def _match(self, token_type, advance=True, expression=None): 4546 if not self._curr: 4547 return None 4548 4549 if self._curr.token_type == token_type: 4550 if advance: 4551 self._advance() 4552 self._add_comments(expression) 4553 return True 4554 4555 return None 4556 4557 def _match_set(self, types, advance=True): 4558 if not self._curr: 4559 return None 4560 4561 if self._curr.token_type in types: 4562 if advance: 4563 self._advance() 4564 return True 4565 4566 return None 4567 4568 def _match_pair(self, token_type_a, token_type_b, advance=True): 4569 if not self._curr or not self._next: 4570 return None 4571 4572 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4573 if advance: 4574 self._advance(2) 4575 return True 4576 4577 return None 4578 4579 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4580 if not self._match(TokenType.L_PAREN, expression=expression): 4581 self.raise_error("Expecting (") 4582 4583 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4584 if not self._match(TokenType.R_PAREN, expression=expression): 4585 self.raise_error("Expecting )") 4586 4587 def _match_texts(self, texts, advance=True): 4588 if self._curr and self._curr.text.upper() in texts: 4589 if advance: 4590 self._advance() 4591 return True 4592 return False 4593 4594 def _match_text_seq(self, *texts, advance=True): 4595 index = self._index 4596 for text in texts: 4597 if self._curr and self._curr.text.upper() == text: 4598 self._advance() 4599 else: 4600 self._retreat(index) 4601 return False 4602 4603 if not advance: 4604 self._retreat(index) 4605 4606 return True 4607 4608 @t.overload 4609 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4610 ... 4611 4612 @t.overload 4613 def _replace_columns_with_dots( 4614 self, this: t.Optional[exp.Expression] 4615 ) -> t.Optional[exp.Expression]: 4616 ... 4617 4618 def _replace_columns_with_dots(self, this): 4619 if isinstance(this, exp.Dot): 4620 exp.replace_children(this, self._replace_columns_with_dots) 4621 elif isinstance(this, exp.Column): 4622 exp.replace_children(this, self._replace_columns_with_dots) 4623 table = this.args.get("table") 4624 this = ( 4625 self.expression(exp.Dot, this=table, expression=this.this) 4626 if table 4627 else self.expression(exp.Var, this=this.name) 4628 ) 4629 elif isinstance(this, exp.Identifier): 4630 this = self.expression(exp.Var, this=this.name) 4631 4632 return this 4633 4634 def _replace_lambda( 4635 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4636 ) -> t.Optional[exp.Expression]: 4637 if not node: 4638 return node 4639 4640 for column in node.find_all(exp.Column): 4641 if column.parts[0].name in lambda_variables: 4642 dot_or_id = column.to_dot() if column.table else column.this 4643 parent = column.parent 4644 4645 while isinstance(parent, exp.Dot): 4646 if not isinstance(parent.parent, exp.Dot): 4647 parent.replace(dot_or_id) 4648 break 4649 parent = parent.parent 4650 else: 4651 if column is node: 4652 node = dot_or_id 4653 else: 4654 column.replace(dot_or_id) 4655 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 TYPE_TOKENS = { 114 TokenType.BIT, 115 TokenType.BOOLEAN, 116 TokenType.TINYINT, 117 TokenType.UTINYINT, 118 TokenType.SMALLINT, 119 TokenType.USMALLINT, 120 TokenType.INT, 121 TokenType.UINT, 122 TokenType.BIGINT, 123 TokenType.UBIGINT, 124 TokenType.INT128, 125 TokenType.UINT128, 126 TokenType.INT256, 127 TokenType.UINT256, 128 TokenType.FLOAT, 129 TokenType.DOUBLE, 130 TokenType.CHAR, 131 TokenType.NCHAR, 132 TokenType.VARCHAR, 133 TokenType.NVARCHAR, 134 TokenType.TEXT, 135 TokenType.MEDIUMTEXT, 136 TokenType.LONGTEXT, 137 TokenType.MEDIUMBLOB, 138 TokenType.LONGBLOB, 139 TokenType.BINARY, 140 TokenType.VARBINARY, 141 TokenType.JSON, 142 TokenType.JSONB, 143 TokenType.INTERVAL, 144 TokenType.TIME, 145 TokenType.TIMESTAMP, 146 TokenType.TIMESTAMPTZ, 147 TokenType.TIMESTAMPLTZ, 148 TokenType.DATETIME, 149 TokenType.DATETIME64, 150 TokenType.DATE, 151 TokenType.INT4RANGE, 152 TokenType.INT4MULTIRANGE, 153 TokenType.INT8RANGE, 154 TokenType.INT8MULTIRANGE, 155 TokenType.NUMRANGE, 156 TokenType.NUMMULTIRANGE, 157 TokenType.TSRANGE, 158 TokenType.TSMULTIRANGE, 159 TokenType.TSTZRANGE, 160 TokenType.TSTZMULTIRANGE, 161 TokenType.DATERANGE, 162 TokenType.DATEMULTIRANGE, 163 TokenType.DECIMAL, 164 TokenType.BIGDECIMAL, 165 TokenType.UUID, 166 TokenType.GEOGRAPHY, 167 TokenType.GEOMETRY, 168 TokenType.HLLSKETCH, 169 TokenType.HSTORE, 170 TokenType.PSEUDO_TYPE, 171 TokenType.SUPER, 172 TokenType.SERIAL, 173 TokenType.SMALLSERIAL, 174 TokenType.BIGSERIAL, 175 TokenType.XML, 176 TokenType.UNIQUEIDENTIFIER, 177 TokenType.MONEY, 178 TokenType.SMALLMONEY, 179 TokenType.ROWVERSION, 180 TokenType.IMAGE, 181 TokenType.VARIANT, 182 TokenType.OBJECT, 183 TokenType.INET, 184 *NESTED_TYPE_TOKENS, 185 } 186 187 SUBQUERY_PREDICATES = { 188 TokenType.ANY: exp.Any, 189 TokenType.ALL: exp.All, 190 TokenType.EXISTS: exp.Exists, 191 TokenType.SOME: exp.Any, 192 } 193 194 RESERVED_KEYWORDS = { 195 *Tokenizer.SINGLE_TOKENS.values(), 196 TokenType.SELECT, 197 } 198 199 DB_CREATABLES = { 200 TokenType.DATABASE, 201 TokenType.SCHEMA, 202 TokenType.TABLE, 203 TokenType.VIEW, 204 TokenType.DICTIONARY, 205 } 206 207 CREATABLES = { 208 TokenType.COLUMN, 209 TokenType.FUNCTION, 210 TokenType.INDEX, 211 TokenType.PROCEDURE, 212 *DB_CREATABLES, 213 } 214 215 # Tokens that can represent identifiers 216 ID_VAR_TOKENS = { 217 TokenType.VAR, 218 TokenType.ANTI, 219 TokenType.APPLY, 220 TokenType.ASC, 221 TokenType.AUTO_INCREMENT, 222 TokenType.BEGIN, 223 TokenType.CACHE, 224 TokenType.COLLATE, 225 TokenType.COMMAND, 226 TokenType.COMMENT, 227 TokenType.COMMIT, 228 TokenType.CONSTRAINT, 229 TokenType.DEFAULT, 230 TokenType.DELETE, 231 TokenType.DESC, 232 TokenType.DESCRIBE, 233 TokenType.DICTIONARY, 234 TokenType.DIV, 235 TokenType.END, 236 TokenType.EXECUTE, 237 TokenType.ESCAPE, 238 TokenType.FALSE, 239 TokenType.FIRST, 240 TokenType.FILTER, 241 TokenType.FORMAT, 242 TokenType.FULL, 243 TokenType.IF, 244 TokenType.IS, 245 TokenType.ISNULL, 246 TokenType.INTERVAL, 247 TokenType.KEEP, 248 TokenType.LEFT, 249 TokenType.LOAD, 250 TokenType.MERGE, 251 TokenType.NATURAL, 252 TokenType.NEXT, 253 TokenType.OFFSET, 254 TokenType.ORDINALITY, 255 TokenType.OVERWRITE, 256 TokenType.PARTITION, 257 TokenType.PERCENT, 258 TokenType.PIVOT, 259 TokenType.PRAGMA, 260 TokenType.RANGE, 261 TokenType.REFERENCES, 262 TokenType.RIGHT, 263 TokenType.ROW, 264 TokenType.ROWS, 265 TokenType.SEMI, 266 TokenType.SET, 267 TokenType.SETTINGS, 268 TokenType.SHOW, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRUE, 272 TokenType.UNIQUE, 273 TokenType.UNPIVOT, 274 TokenType.VOLATILE, 275 TokenType.WINDOW, 276 *CREATABLES, 277 *SUBQUERY_PREDICATES, 278 *TYPE_TOKENS, 279 *NO_PAREN_FUNCTIONS, 280 } 281 282 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 283 284 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 285 TokenType.APPLY, 286 TokenType.ASOF, 287 TokenType.FULL, 288 TokenType.LEFT, 289 TokenType.LOCK, 290 TokenType.NATURAL, 291 TokenType.OFFSET, 292 TokenType.RIGHT, 293 TokenType.WINDOW, 294 } 295 296 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 297 298 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 299 300 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 301 302 FUNC_TOKENS = { 303 TokenType.COMMAND, 304 TokenType.CURRENT_DATE, 305 TokenType.CURRENT_DATETIME, 306 TokenType.CURRENT_TIMESTAMP, 307 TokenType.CURRENT_TIME, 308 TokenType.CURRENT_USER, 309 TokenType.FILTER, 310 TokenType.FIRST, 311 TokenType.FORMAT, 312 TokenType.GLOB, 313 TokenType.IDENTIFIER, 314 TokenType.INDEX, 315 TokenType.ISNULL, 316 TokenType.ILIKE, 317 TokenType.LIKE, 318 TokenType.MERGE, 319 TokenType.OFFSET, 320 TokenType.PRIMARY_KEY, 321 TokenType.RANGE, 322 TokenType.REPLACE, 323 TokenType.ROW, 324 TokenType.UNNEST, 325 TokenType.VAR, 326 TokenType.LEFT, 327 TokenType.RIGHT, 328 TokenType.DATE, 329 TokenType.DATETIME, 330 TokenType.TABLE, 331 TokenType.TIMESTAMP, 332 TokenType.TIMESTAMPTZ, 333 TokenType.WINDOW, 334 *TYPE_TOKENS, 335 *SUBQUERY_PREDICATES, 336 } 337 338 CONJUNCTION = { 339 TokenType.AND: exp.And, 340 TokenType.OR: exp.Or, 341 } 342 343 EQUALITY = { 344 TokenType.EQ: exp.EQ, 345 TokenType.NEQ: exp.NEQ, 346 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 347 } 348 349 COMPARISON = { 350 TokenType.GT: exp.GT, 351 TokenType.GTE: exp.GTE, 352 TokenType.LT: exp.LT, 353 TokenType.LTE: exp.LTE, 354 } 355 356 BITWISE = { 357 TokenType.AMP: exp.BitwiseAnd, 358 TokenType.CARET: exp.BitwiseXor, 359 TokenType.PIPE: exp.BitwiseOr, 360 TokenType.DPIPE: exp.DPipe, 361 } 362 363 TERM = { 364 TokenType.DASH: exp.Sub, 365 TokenType.PLUS: exp.Add, 366 TokenType.MOD: exp.Mod, 367 TokenType.COLLATE: exp.Collate, 368 } 369 370 FACTOR = { 371 TokenType.DIV: exp.IntDiv, 372 TokenType.LR_ARROW: exp.Distance, 373 TokenType.SLASH: exp.Div, 374 TokenType.STAR: exp.Mul, 375 } 376 377 TIMESTAMPS = { 378 TokenType.TIME, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.TIMESTAMPLTZ, 382 } 383 384 SET_OPERATIONS = { 385 TokenType.UNION, 386 TokenType.INTERSECT, 387 TokenType.EXCEPT, 388 } 389 390 JOIN_METHODS = { 391 TokenType.NATURAL, 392 TokenType.ASOF, 393 } 394 395 JOIN_SIDES = { 396 TokenType.LEFT, 397 TokenType.RIGHT, 398 TokenType.FULL, 399 } 400 401 JOIN_KINDS = { 402 TokenType.INNER, 403 TokenType.OUTER, 404 TokenType.CROSS, 405 TokenType.SEMI, 406 TokenType.ANTI, 407 } 408 409 JOIN_HINTS: t.Set[str] = set() 410 411 LAMBDAS = { 412 TokenType.ARROW: lambda self, expressions: self.expression( 413 exp.Lambda, 414 this=self._replace_lambda( 415 self._parse_conjunction(), 416 {node.name for node in expressions}, 417 ), 418 expressions=expressions, 419 ), 420 TokenType.FARROW: lambda self, expressions: self.expression( 421 exp.Kwarg, 422 this=exp.var(expressions[0].name), 423 expression=self._parse_conjunction(), 424 ), 425 } 426 427 COLUMN_OPERATORS = { 428 TokenType.DOT: None, 429 TokenType.DCOLON: lambda self, this, to: self.expression( 430 exp.Cast if self.STRICT_CAST else exp.TryCast, 431 this=this, 432 to=to, 433 ), 434 TokenType.ARROW: lambda self, this, path: self.expression( 435 exp.JSONExtract, 436 this=this, 437 expression=path, 438 ), 439 TokenType.DARROW: lambda self, this, path: self.expression( 440 exp.JSONExtractScalar, 441 this=this, 442 expression=path, 443 ), 444 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 445 exp.JSONBExtract, 446 this=this, 447 expression=path, 448 ), 449 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 450 exp.JSONBExtractScalar, 451 this=this, 452 expression=path, 453 ), 454 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 455 exp.JSONBContains, 456 this=this, 457 expression=key, 458 ), 459 } 460 461 EXPRESSION_PARSERS = { 462 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 463 exp.Column: lambda self: self._parse_column(), 464 exp.Condition: lambda self: self._parse_conjunction(), 465 exp.DataType: lambda self: self._parse_types(), 466 exp.Expression: lambda self: self._parse_statement(), 467 exp.From: lambda self: self._parse_from(), 468 exp.Group: lambda self: self._parse_group(), 469 exp.Having: lambda self: self._parse_having(), 470 exp.Identifier: lambda self: self._parse_id_var(), 471 exp.Join: lambda self: self._parse_join(), 472 exp.Lambda: lambda self: self._parse_lambda(), 473 exp.Lateral: lambda self: self._parse_lateral(), 474 exp.Limit: lambda self: self._parse_limit(), 475 exp.Offset: lambda self: self._parse_offset(), 476 exp.Order: lambda self: self._parse_order(), 477 exp.Ordered: lambda self: self._parse_ordered(), 478 exp.Properties: lambda self: self._parse_properties(), 479 exp.Qualify: lambda self: self._parse_qualify(), 480 exp.Returning: lambda self: self._parse_returning(), 481 exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 482 exp.Table: lambda self: self._parse_table_parts(), 483 exp.TableAlias: lambda self: self._parse_table_alias(), 484 exp.Where: lambda self: self._parse_where(), 485 exp.Window: lambda self: self._parse_named_window(), 486 exp.With: lambda self: self._parse_with(), 487 "JOIN_TYPE": lambda self: self._parse_join_parts(), 488 } 489 490 STATEMENT_PARSERS = { 491 TokenType.ALTER: lambda self: self._parse_alter(), 492 TokenType.BEGIN: lambda self: self._parse_transaction(), 493 TokenType.CACHE: lambda self: self._parse_cache(), 494 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 495 TokenType.COMMENT: lambda self: self._parse_comment(), 496 TokenType.CREATE: lambda self: self._parse_create(), 497 TokenType.DELETE: lambda self: self._parse_delete(), 498 TokenType.DESC: lambda self: self._parse_describe(), 499 TokenType.DESCRIBE: lambda self: self._parse_describe(), 500 TokenType.DROP: lambda self: self._parse_drop(), 501 TokenType.END: lambda self: self._parse_commit_or_rollback(), 502 TokenType.FROM: lambda self: exp.select("*").from_( 503 t.cast(exp.From, self._parse_from(skip_from_token=True)) 504 ), 505 TokenType.INSERT: lambda self: self._parse_insert(), 506 TokenType.LOAD: lambda self: self._parse_load(), 507 TokenType.MERGE: lambda self: self._parse_merge(), 508 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 509 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 510 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 511 TokenType.SET: lambda self: self._parse_set(), 512 TokenType.UNCACHE: lambda self: self._parse_uncache(), 513 TokenType.UPDATE: lambda self: self._parse_update(), 514 TokenType.USE: lambda self: self.expression( 515 exp.Use, 516 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 517 and exp.var(self._prev.text), 518 this=self._parse_table(schema=False), 519 ), 520 } 521 522 UNARY_PARSERS = { 523 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 524 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 525 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 526 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 527 } 528 529 PRIMARY_PARSERS = { 530 TokenType.STRING: lambda self, token: self.expression( 531 exp.Literal, this=token.text, is_string=True 532 ), 533 TokenType.NUMBER: lambda self, token: self.expression( 534 exp.Literal, this=token.text, is_string=False 535 ), 536 TokenType.STAR: lambda self, _: self.expression( 537 exp.Star, 538 **{"except": self._parse_except(), "replace": self._parse_replace()}, 539 ), 540 TokenType.NULL: lambda self, _: self.expression(exp.Null), 541 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 542 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 543 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 544 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 545 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 546 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 547 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 548 exp.National, this=token.text 549 ), 550 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 551 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 552 } 553 554 PLACEHOLDER_PARSERS = { 555 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 556 TokenType.PARAMETER: lambda self: self._parse_parameter(), 557 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 558 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 559 else None, 560 } 561 562 RANGE_PARSERS = { 563 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 564 TokenType.GLOB: binary_range_parser(exp.Glob), 565 TokenType.ILIKE: binary_range_parser(exp.ILike), 566 TokenType.IN: lambda self, this: self._parse_in(this), 567 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 568 TokenType.IS: lambda self, this: self._parse_is(this), 569 TokenType.LIKE: binary_range_parser(exp.Like), 570 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 571 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 572 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 573 } 574 575 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 576 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 577 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 578 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 579 "CHARACTER SET": lambda self: self._parse_character_set(), 580 "CHECKSUM": lambda self: self._parse_checksum(), 581 "CLUSTER": lambda self: self._parse_cluster(), 582 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 583 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 584 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 585 "DEFINER": lambda self: self._parse_definer(), 586 "DETERMINISTIC": lambda self: self.expression( 587 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 588 ), 589 "DISTKEY": lambda self: self._parse_distkey(), 590 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 591 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 592 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 593 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 594 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 595 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 596 "FREESPACE": lambda self: self._parse_freespace(), 597 "IMMUTABLE": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 601 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 602 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 603 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 604 "LIKE": lambda self: self._parse_create_like(), 605 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 606 "LOCK": lambda self: self._parse_locking(), 607 "LOCKING": lambda self: self._parse_locking(), 608 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 609 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 610 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 611 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 612 "NO": lambda self: self._parse_no_property(), 613 "ON": lambda self: self._parse_on_property(), 614 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 615 "PARTITION BY": lambda self: self._parse_partitioned_by(), 616 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 617 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 618 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 619 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 620 "RETURNS": lambda self: self._parse_returns(), 621 "ROW": lambda self: self._parse_row(), 622 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 623 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 624 "SETTINGS": lambda self: self.expression( 625 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 626 ), 627 "SORTKEY": lambda self: self._parse_sortkey(), 628 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 629 "STABLE": lambda self: self.expression( 630 exp.StabilityProperty, this=exp.Literal.string("STABLE") 631 ), 632 "STORED": lambda self: self._parse_stored(), 633 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 634 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 635 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 636 "TO": lambda self: self._parse_to_table(), 637 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 638 "TTL": lambda self: self._parse_ttl(), 639 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 640 "VOLATILE": lambda self: self._parse_volatile_property(), 641 "WITH": lambda self: self._parse_with_property(), 642 } 643 644 CONSTRAINT_PARSERS = { 645 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 646 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 647 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 648 "CHARACTER SET": lambda self: self.expression( 649 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 650 ), 651 "CHECK": lambda self: self.expression( 652 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 653 ), 654 "COLLATE": lambda self: self.expression( 655 exp.CollateColumnConstraint, this=self._parse_var() 656 ), 657 "COMMENT": lambda self: self.expression( 658 exp.CommentColumnConstraint, this=self._parse_string() 659 ), 660 "COMPRESS": lambda self: self._parse_compress(), 661 "DEFAULT": lambda self: self.expression( 662 exp.DefaultColumnConstraint, this=self._parse_bitwise() 663 ), 664 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 665 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 666 "FORMAT": lambda self: self.expression( 667 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 668 ), 669 "GENERATED": lambda self: self._parse_generated_as_identity(), 670 "IDENTITY": lambda self: self._parse_auto_increment(), 671 "INLINE": lambda self: self._parse_inline(), 672 "LIKE": lambda self: self._parse_create_like(), 673 "NOT": lambda self: self._parse_not_constraint(), 674 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 675 "ON": lambda self: self._match(TokenType.UPDATE) 676 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 677 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 678 "PRIMARY KEY": lambda self: self._parse_primary_key(), 679 "REFERENCES": lambda self: self._parse_references(match=False), 680 "TITLE": lambda self: self.expression( 681 exp.TitleColumnConstraint, this=self._parse_var_or_string() 682 ), 683 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 684 "UNIQUE": lambda self: self._parse_unique(), 685 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 686 } 687 688 ALTER_PARSERS = { 689 "ADD": lambda self: self._parse_alter_table_add(), 690 "ALTER": lambda self: self._parse_alter_table_alter(), 691 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 692 "DROP": lambda self: self._parse_alter_table_drop(), 693 "RENAME": lambda self: self._parse_alter_table_rename(), 694 } 695 696 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 697 698 NO_PAREN_FUNCTION_PARSERS = { 699 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 700 TokenType.CASE: lambda self: self._parse_case(), 701 TokenType.IF: lambda self: self._parse_if(), 702 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 703 exp.NextValueFor, 704 this=self._parse_column(), 705 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 706 ), 707 } 708 709 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 710 711 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 712 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 713 "CONCAT": lambda self: self._parse_concat(), 714 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 715 "DECODE": lambda self: self._parse_decode(), 716 "EXTRACT": lambda self: self._parse_extract(), 717 "JSON_OBJECT": lambda self: self._parse_json_object(), 718 "LOG": lambda self: self._parse_logarithm(), 719 "MATCH": lambda self: self._parse_match_against(), 720 "OPENJSON": lambda self: self._parse_open_json(), 721 "POSITION": lambda self: self._parse_position(), 722 "SAFE_CAST": lambda self: self._parse_cast(False), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "locks": lambda self: self._parse_locks(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 760 761 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 762 763 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 764 TRANSACTION_CHARACTERISTICS = { 765 "ISOLATION LEVEL REPEATABLE READ", 766 "ISOLATION LEVEL READ COMMITTED", 767 "ISOLATION LEVEL READ UNCOMMITTED", 768 "ISOLATION LEVEL SERIALIZABLE", 769 "READ WRITE", 770 "READ ONLY", 771 } 772 773 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 774 775 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 776 777 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 778 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 779 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 780 781 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 782 783 STRICT_CAST = True 784 785 CONCAT_NULL_OUTPUTS_STRING = False # A NULL arg in CONCAT yields NULL by default 786 787 CONVERT_TYPE_FIRST = False 788 789 PREFIXED_PIVOT_COLUMNS = False 790 IDENTIFY_PIVOT_STRINGS = False 791 792 LOG_BASE_FIRST = True 793 LOG_DEFAULTS_TO_LN = False 794 795 __slots__ = ( 796 "error_level", 797 "error_message_context", 798 "max_errors", 799 "sql", 800 "errors", 801 "_tokens", 802 "_index", 803 "_curr", 804 "_next", 805 "_prev", 806 "_prev_comments", 807 ) 808 809 # Autofilled 810 INDEX_OFFSET: int = 0 811 UNNEST_COLUMN_ONLY: bool = False 812 ALIAS_POST_TABLESAMPLE: bool = False 813 STRICT_STRING_CONCAT = False 814 NULL_ORDERING: str = "nulls_are_small" 815 SHOW_TRIE: t.Dict = {} 816 SET_TRIE: t.Dict = {} 817 FORMAT_MAPPING: t.Dict[str, str] = {} 818 FORMAT_TRIE: t.Dict = {} 819 TIME_MAPPING: t.Dict[str, str] = {} 820 TIME_TRIE: t.Dict = {} 821 822 def __init__( 823 self, 824 error_level: t.Optional[ErrorLevel] = None, 825 error_message_context: int = 100, 826 max_errors: int = 3, 827 ): 828 self.error_level = error_level or ErrorLevel.IMMEDIATE 829 self.error_message_context = error_message_context 830 self.max_errors = max_errors 831 self.reset() 832 833 def reset(self): 834 self.sql = "" 835 self.errors = [] 836 self._tokens = [] 837 self._index = 0 838 self._curr = None 839 self._next = None 840 self._prev = None 841 self._prev_comments = None 842 843 def parse( 844 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 845 ) -> t.List[t.Optional[exp.Expression]]: 846 """ 847 Parses a list of tokens and returns a list of syntax trees, one tree 848 per parsed SQL statement. 849 850 Args: 851 raw_tokens: The list of tokens. 852 sql: The original SQL string, used to produce helpful debug messages. 853 854 Returns: 855 The list of the produced syntax trees. 856 """ 857 return self._parse( 858 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 859 ) 860 861 def parse_into( 862 self, 863 expression_types: exp.IntoType, 864 raw_tokens: t.List[Token], 865 sql: t.Optional[str] = None, 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens into a given Expression type. If a collection of Expression 869 types is given instead, this method will try to parse the token list into each one 870 of them, stopping at the first for which the parsing succeeds. 871 872 Args: 873 expression_types: The expression type(s) to try and parse the token list into. 874 raw_tokens: The list of tokens. 875 sql: The original SQL string, used to produce helpful debug messages. 876 877 Returns: 878 The target Expression. 879 """ 880 errors = [] 881 for expression_type in ensure_list(expression_types): 882 parser = self.EXPRESSION_PARSERS.get(expression_type) 883 if not parser: 884 raise TypeError(f"No parser registered for {expression_type}") 885 886 try: 887 return self._parse(parser, raw_tokens, sql) 888 except ParseError as e: 889 e.errors[0]["into_expression"] = expression_type 890 errors.append(e) 891 892 raise ParseError( 893 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 894 errors=merge_errors(errors), 895 ) from errors[-1] 896 897 def _parse( 898 self, 899 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 900 raw_tokens: t.List[Token], 901 sql: t.Optional[str] = None, 902 ) -> t.List[t.Optional[exp.Expression]]: 903 self.reset() 904 self.sql = sql or "" 905 906 total = len(raw_tokens) 907 chunks: t.List[t.List[Token]] = [[]] 908 909 for i, token in enumerate(raw_tokens): 910 if token.token_type == TokenType.SEMICOLON: 911 if i < total - 1: 912 chunks.append([]) 913 else: 914 chunks[-1].append(token) 915 916 expressions = [] 917 918 for tokens in chunks: 919 self._index = -1 920 self._tokens = tokens 921 self._advance() 922 923 expressions.append(parse_method(self)) 924 925 if self._index < len(self._tokens): 926 self.raise_error("Invalid expression / Unexpected token") 927 928 self.check_errors() 929 930 return expressions 931 932 def check_errors(self) -> None: 933 """Logs or raises any found errors, depending on the chosen error level setting.""" 934 if self.error_level == ErrorLevel.WARN: 935 for error in self.errors: 936 logger.error(str(error)) 937 elif self.error_level == ErrorLevel.RAISE and self.errors: 938 raise ParseError( 939 concat_messages(self.errors, self.max_errors), 940 errors=merge_errors(self.errors), 941 ) 942 943 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 944 """ 945 Appends an error in the list of recorded errors or raises it, depending on the chosen 946 error level setting. 947 """ 948 token = token or self._curr or self._prev or Token.string("") 949 start = token.start 950 end = token.end + 1 951 start_context = self.sql[max(start - self.error_message_context, 0) : start] 952 highlight = self.sql[start:end] 953 end_context = self.sql[end : end + self.error_message_context] 954 955 error = ParseError.new( 956 f"{message}. Line {token.line}, Col: {token.col}.\n" 957 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 958 description=message, 959 line=token.line, 960 col=token.col, 961 start_context=start_context, 962 highlight=highlight, 963 end_context=end_context, 964 ) 965 966 if self.error_level == ErrorLevel.IMMEDIATE: 967 raise error 968 969 self.errors.append(error) 970 971 def expression( 972 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 973 ) -> E: 974 """ 975 Creates a new, validated Expression. 976 977 Args: 978 exp_class: The expression class to instantiate. 979 comments: An optional list of comments to attach to the expression. 980 kwargs: The arguments to set for the expression along with their respective values. 981 982 Returns: 983 The target expression. 984 """ 985 instance = exp_class(**kwargs) 986 instance.add_comments(comments) if comments else self._add_comments(instance) 987 return self.validate_expression(instance) 988 989 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 990 if expression and self._prev_comments: 991 expression.add_comments(self._prev_comments) 992 self._prev_comments = None 993 994 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 995 """ 996 Validates an Expression, making sure that all its mandatory arguments are set. 997 998 Args: 999 expression: The expression to validate. 1000 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1001 1002 Returns: 1003 The validated expression. 1004 """ 1005 if self.error_level != ErrorLevel.IGNORE: 1006 for error_message in expression.error_messages(args): 1007 self.raise_error(error_message) 1008 1009 return expression 1010 1011 def _find_sql(self, start: Token, end: Token) -> str: 1012 return self.sql[start.start : end.end + 1] 1013 1014 def _advance(self, times: int = 1) -> None: 1015 self._index += times 1016 self._curr = seq_get(self._tokens, self._index) 1017 self._next = seq_get(self._tokens, self._index + 1) 1018 1019 if self._index > 0: 1020 self._prev = self._tokens[self._index - 1] 1021 self._prev_comments = self._prev.comments 1022 else: 1023 self._prev = None 1024 self._prev_comments = None 1025 1026 def _retreat(self, index: int) -> None: 1027 if index != self._index: 1028 self._advance(index - self._index) 1029 1030 def _parse_command(self) -> exp.Command: 1031 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1032 1033 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1034 start = self._prev 1035 exists = self._parse_exists() if allow_exists else None 1036 1037 self._match(TokenType.ON) 1038 1039 kind = self._match_set(self.CREATABLES) and self._prev 1040 if not kind: 1041 return self._parse_as_command(start) 1042 1043 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1044 this = self._parse_user_defined_function(kind=kind.token_type) 1045 elif kind.token_type == TokenType.TABLE: 1046 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1047 elif kind.token_type == TokenType.COLUMN: 1048 this = self._parse_column() 1049 else: 1050 this = self._parse_id_var() 1051 1052 self._match(TokenType.IS) 1053 1054 return self.expression( 1055 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1056 ) 1057 1058 def _parse_to_table( 1059 self, 1060 ) -> exp.ToTableProperty: 1061 table = self._parse_table_parts(schema=True) 1062 return self.expression(exp.ToTableProperty, this=table) 1063 1064 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1065 def _parse_ttl(self) -> exp.Expression: 1066 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1067 this = self._parse_bitwise() 1068 1069 if self._match_text_seq("DELETE"): 1070 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1071 if self._match_text_seq("RECOMPRESS"): 1072 return self.expression( 1073 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1074 ) 1075 if self._match_text_seq("TO", "DISK"): 1076 return self.expression( 1077 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1078 ) 1079 if self._match_text_seq("TO", "VOLUME"): 1080 return self.expression( 1081 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1082 ) 1083 1084 return this 1085 1086 expressions = self._parse_csv(_parse_ttl_action) 1087 where = self._parse_where() 1088 group = self._parse_group() 1089 1090 aggregates = None 1091 if group and self._match(TokenType.SET): 1092 aggregates = self._parse_csv(self._parse_set_item) 1093 1094 return self.expression( 1095 exp.MergeTreeTTL, 1096 expressions=expressions, 1097 where=where, 1098 group=group, 1099 aggregates=aggregates, 1100 ) 1101 1102 def _parse_statement(self) -> t.Optional[exp.Expression]: 1103 if self._curr is None: 1104 return None 1105 1106 if self._match_set(self.STATEMENT_PARSERS): 1107 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1108 1109 if self._match_set(Tokenizer.COMMANDS): 1110 return self._parse_command() 1111 1112 expression = self._parse_expression() 1113 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1114 return self._parse_query_modifiers(expression) 1115 1116 def _parse_drop(self) -> exp.Drop | exp.Command: 1117 start = self._prev 1118 temporary = self._match(TokenType.TEMPORARY) 1119 materialized = self._match_text_seq("MATERIALIZED") 1120 1121 kind = self._match_set(self.CREATABLES) and self._prev.text 1122 if not kind: 1123 return self._parse_as_command(start) 1124 1125 return self.expression( 1126 exp.Drop, 1127 exists=self._parse_exists(), 1128 this=self._parse_table(schema=True), 1129 kind=kind, 1130 temporary=temporary, 1131 materialized=materialized, 1132 cascade=self._match_text_seq("CASCADE"), 1133 constraints=self._match_text_seq("CONSTRAINTS"), 1134 purge=self._match_text_seq("PURGE"), 1135 ) 1136 1137 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1138 return ( 1139 self._match(TokenType.IF) 1140 and (not not_ or self._match(TokenType.NOT)) 1141 and self._match(TokenType.EXISTS) 1142 ) 1143 1144 def _parse_create(self) -> exp.Create | exp.Command: 1145 # Note: this can't be None because we've matched a statement parser 1146 start = self._prev 1147 replace = start.text.upper() == "REPLACE" or self._match_pair( 1148 TokenType.OR, TokenType.REPLACE 1149 ) 1150 unique = self._match(TokenType.UNIQUE) 1151 1152 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1153 self._advance() 1154 1155 properties = None 1156 create_token = self._match_set(self.CREATABLES) and self._prev 1157 1158 if not create_token: 1159 # exp.Properties.Location.POST_CREATE 1160 properties = self._parse_properties() 1161 create_token = self._match_set(self.CREATABLES) and self._prev 1162 1163 if not properties or not create_token: 1164 return self._parse_as_command(start) 1165 1166 exists = self._parse_exists(not_=True) 1167 this = None 1168 expression = None 1169 indexes = None 1170 no_schema_binding = None 1171 begin = None 1172 clone = None 1173 1174 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1175 nonlocal properties 1176 if properties and temp_props: 1177 properties.expressions.extend(temp_props.expressions) 1178 elif temp_props: 1179 properties = temp_props 1180 1181 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1182 this = self._parse_user_defined_function(kind=create_token.token_type) 1183 1184 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1185 extend_props(self._parse_properties()) 1186 1187 self._match(TokenType.ALIAS) 1188 begin = self._match(TokenType.BEGIN) 1189 return_ = self._match_text_seq("RETURN") 1190 expression = self._parse_statement() 1191 1192 if return_: 1193 expression = self.expression(exp.Return, this=expression) 1194 elif create_token.token_type == TokenType.INDEX: 1195 this = self._parse_index(index=self._parse_id_var()) 1196 elif create_token.token_type in self.DB_CREATABLES: 1197 table_parts = self._parse_table_parts(schema=True) 1198 1199 # exp.Properties.Location.POST_NAME 1200 self._match(TokenType.COMMA) 1201 extend_props(self._parse_properties(before=True)) 1202 1203 this = self._parse_schema(this=table_parts) 1204 1205 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1210 # exp.Properties.Location.POST_ALIAS 1211 extend_props(self._parse_properties()) 1212 1213 expression = self._parse_ddl_select() 1214 1215 if create_token.token_type == TokenType.TABLE: 1216 indexes = [] 1217 while True: 1218 index = self._parse_index() 1219 1220 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1221 extend_props(self._parse_properties()) 1222 1223 if not index: 1224 break 1225 else: 1226 self._match(TokenType.COMMA) 1227 indexes.append(index) 1228 elif create_token.token_type == TokenType.VIEW: 1229 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1230 no_schema_binding = True 1231 1232 if self._match_text_seq("CLONE"): 1233 clone = self._parse_table(schema=True) 1234 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1235 clone_kind = ( 1236 self._match(TokenType.L_PAREN) 1237 and self._match_texts(self.CLONE_KINDS) 1238 and self._prev.text.upper() 1239 ) 1240 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1241 self._match(TokenType.R_PAREN) 1242 clone = self.expression( 1243 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1244 ) 1245 1246 return self.expression( 1247 exp.Create, 1248 this=this, 1249 kind=create_token.text, 1250 replace=replace, 1251 unique=unique, 1252 expression=expression, 1253 exists=exists, 1254 properties=properties, 1255 indexes=indexes, 1256 no_schema_binding=no_schema_binding, 1257 begin=begin, 1258 clone=clone, 1259 ) 1260 1261 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1262 # only used for teradata currently 1263 self._match(TokenType.COMMA) 1264 1265 kwargs = { 1266 "no": self._match_text_seq("NO"), 1267 "dual": self._match_text_seq("DUAL"), 1268 "before": self._match_text_seq("BEFORE"), 1269 "default": self._match_text_seq("DEFAULT"), 1270 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1271 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1272 "after": self._match_text_seq("AFTER"), 1273 "minimum": self._match_texts(("MIN", "MINIMUM")), 1274 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1275 } 1276 1277 if self._match_texts(self.PROPERTY_PARSERS): 1278 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1279 try: 1280 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1281 except TypeError: 1282 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1283 1284 return None 1285 1286 def _parse_property(self) -> t.Optional[exp.Expression]: 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1289 1290 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1291 return self._parse_character_set(default=True) 1292 1293 if self._match_text_seq("COMPOUND", "SORTKEY"): 1294 return self._parse_sortkey(compound=True) 1295 1296 if self._match_text_seq("SQL", "SECURITY"): 1297 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1298 1299 assignment = self._match_pair( 1300 TokenType.VAR, TokenType.EQ, advance=False 1301 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1302 1303 if assignment: 1304 key = self._parse_var_or_string() 1305 self._match(TokenType.EQ) 1306 return self.expression(exp.Property, this=key, value=self._parse_column()) 1307 1308 return None 1309 1310 def _parse_stored(self) -> exp.FileFormatProperty: 1311 self._match(TokenType.ALIAS) 1312 1313 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1314 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1315 1316 return self.expression( 1317 exp.FileFormatProperty, 1318 this=self.expression( 1319 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1320 ) 1321 if input_format or output_format 1322 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1323 ) 1324 1325 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1326 self._match(TokenType.EQ) 1327 self._match(TokenType.ALIAS) 1328 return self.expression(exp_class, this=self._parse_field()) 1329 1330 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1331 properties = [] 1332 while True: 1333 if before: 1334 prop = self._parse_property_before() 1335 else: 1336 prop = self._parse_property() 1337 1338 if not prop: 1339 break 1340 for p in ensure_list(prop): 1341 properties.append(p) 1342 1343 if properties: 1344 return self.expression(exp.Properties, expressions=properties) 1345 1346 return None 1347 1348 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1349 return self.expression( 1350 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1351 ) 1352 1353 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1354 if self._index >= 2: 1355 pre_volatile_token = self._tokens[self._index - 2] 1356 else: 1357 pre_volatile_token = None 1358 1359 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1360 return exp.VolatileProperty() 1361 1362 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1363 1364 def _parse_with_property( 1365 self, 1366 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1367 self._match(TokenType.WITH) 1368 if self._match(TokenType.L_PAREN, advance=False): 1369 return self._parse_wrapped_csv(self._parse_property) 1370 1371 if self._match_text_seq("JOURNAL"): 1372 return self._parse_withjournaltable() 1373 1374 if self._match_text_seq("DATA"): 1375 return self._parse_withdata(no=False) 1376 elif self._match_text_seq("NO", "DATA"): 1377 return self._parse_withdata(no=True) 1378 1379 if not self._next: 1380 return None 1381 1382 return self._parse_withisolatedloading() 1383 1384 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1385 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1386 self._match(TokenType.EQ) 1387 1388 user = self._parse_id_var() 1389 self._match(TokenType.PARAMETER) 1390 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1391 1392 if not user or not host: 1393 return None 1394 1395 return exp.DefinerProperty(this=f"{user}@{host}") 1396 1397 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1398 self._match(TokenType.TABLE) 1399 self._match(TokenType.EQ) 1400 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1401 1402 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1403 return self.expression(exp.LogProperty, no=no) 1404 1405 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1406 return self.expression(exp.JournalProperty, **kwargs) 1407 1408 def _parse_checksum(self) -> exp.ChecksumProperty: 1409 self._match(TokenType.EQ) 1410 1411 on = None 1412 if self._match(TokenType.ON): 1413 on = True 1414 elif self._match_text_seq("OFF"): 1415 on = False 1416 1417 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1418 1419 def _parse_cluster(self) -> t.Optional[exp.Cluster]: 1420 if not self._match_text_seq("BY"): 1421 self._retreat(self._index - 1) 1422 return None 1423 1424 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1425 1426 def _parse_freespace(self) -> exp.FreespaceProperty: 1427 self._match(TokenType.EQ) 1428 return self.expression( 1429 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1430 ) 1431 1432 def _parse_mergeblockratio( 1433 self, no: bool = False, default: bool = False 1434 ) -> exp.MergeBlockRatioProperty: 1435 if self._match(TokenType.EQ): 1436 return self.expression( 1437 exp.MergeBlockRatioProperty, 1438 this=self._parse_number(), 1439 percent=self._match(TokenType.PERCENT), 1440 ) 1441 1442 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1443 1444 def _parse_datablocksize( 1445 self, 1446 default: t.Optional[bool] = None, 1447 minimum: t.Optional[bool] = None, 1448 maximum: t.Optional[bool] = None, 1449 ) -> exp.DataBlocksizeProperty: 1450 self._match(TokenType.EQ) 1451 size = self._parse_number() 1452 1453 units = None 1454 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1455 units = self._prev.text 1456 1457 return self.expression( 1458 exp.DataBlocksizeProperty, 1459 size=size, 1460 units=units, 1461 default=default, 1462 minimum=minimum, 1463 maximum=maximum, 1464 ) 1465 1466 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1467 self._match(TokenType.EQ) 1468 always = self._match_text_seq("ALWAYS") 1469 manual = self._match_text_seq("MANUAL") 1470 never = self._match_text_seq("NEVER") 1471 default = self._match_text_seq("DEFAULT") 1472 1473 autotemp = None 1474 if self._match_text_seq("AUTOTEMP"): 1475 autotemp = self._parse_schema() 1476 1477 return self.expression( 1478 exp.BlockCompressionProperty, 1479 always=always, 1480 manual=manual, 1481 never=never, 1482 default=default, 1483 autotemp=autotemp, 1484 ) 1485 1486 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1487 no = self._match_text_seq("NO") 1488 concurrent = self._match_text_seq("CONCURRENT") 1489 self._match_text_seq("ISOLATED", "LOADING") 1490 for_all = self._match_text_seq("FOR", "ALL") 1491 for_insert = self._match_text_seq("FOR", "INSERT") 1492 for_none = self._match_text_seq("FOR", "NONE") 1493 return self.expression( 1494 exp.IsolatedLoadingProperty, 1495 no=no, 1496 concurrent=concurrent, 1497 for_all=for_all, 1498 for_insert=for_insert, 1499 for_none=for_none, 1500 ) 1501 1502 def _parse_locking(self) -> exp.LockingProperty: 1503 if self._match(TokenType.TABLE): 1504 kind = "TABLE" 1505 elif self._match(TokenType.VIEW): 1506 kind = "VIEW" 1507 elif self._match(TokenType.ROW): 1508 kind = "ROW" 1509 elif self._match_text_seq("DATABASE"): 1510 kind = "DATABASE" 1511 else: 1512 kind = None 1513 1514 if kind in ("DATABASE", "TABLE", "VIEW"): 1515 this = self._parse_table_parts() 1516 else: 1517 this = None 1518 1519 if self._match(TokenType.FOR): 1520 for_or_in = "FOR" 1521 elif self._match(TokenType.IN): 1522 for_or_in = "IN" 1523 else: 1524 for_or_in = None 1525 1526 if self._match_text_seq("ACCESS"): 1527 lock_type = "ACCESS" 1528 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1529 lock_type = "EXCLUSIVE" 1530 elif self._match_text_seq("SHARE"): 1531 lock_type = "SHARE" 1532 elif self._match_text_seq("READ"): 1533 lock_type = "READ" 1534 elif self._match_text_seq("WRITE"): 1535 lock_type = "WRITE" 1536 elif self._match_text_seq("CHECKSUM"): 1537 lock_type = "CHECKSUM" 1538 else: 1539 lock_type = None 1540 1541 override = self._match_text_seq("OVERRIDE") 1542 1543 return self.expression( 1544 exp.LockingProperty, 1545 this=this, 1546 kind=kind, 1547 for_or_in=for_or_in, 1548 lock_type=lock_type, 1549 override=override, 1550 ) 1551 1552 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1553 if self._match(TokenType.PARTITION_BY): 1554 return self._parse_csv(self._parse_conjunction) 1555 return [] 1556 1557 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1558 self._match(TokenType.EQ) 1559 return self.expression( 1560 exp.PartitionedByProperty, 1561 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1562 ) 1563 1564 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1565 if self._match_text_seq("AND", "STATISTICS"): 1566 statistics = True 1567 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1568 statistics = False 1569 else: 1570 statistics = None 1571 1572 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1573 1574 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1575 if self._match_text_seq("PRIMARY", "INDEX"): 1576 return exp.NoPrimaryIndexProperty() 1577 return None 1578 1579 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1580 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1581 return exp.OnCommitProperty() 1582 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1583 return exp.OnCommitProperty(delete=True) 1584 return None 1585 1586 def _parse_distkey(self) -> exp.DistKeyProperty: 1587 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1588 1589 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1590 table = self._parse_table(schema=True) 1591 1592 options = [] 1593 while self._match_texts(("INCLUDING", "EXCLUDING")): 1594 this = self._prev.text.upper() 1595 1596 id_var = self._parse_id_var() 1597 if not id_var: 1598 return None 1599 1600 options.append( 1601 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1602 ) 1603 1604 return self.expression(exp.LikeProperty, this=table, expressions=options) 1605 1606 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1607 return self.expression( 1608 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1609 ) 1610 1611 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1612 self._match(TokenType.EQ) 1613 return self.expression( 1614 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1615 ) 1616 1617 def _parse_returns(self) -> exp.ReturnsProperty: 1618 value: t.Optional[exp.Expression] 1619 is_table = self._match(TokenType.TABLE) 1620 1621 if is_table: 1622 if self._match(TokenType.LT): 1623 value = self.expression( 1624 exp.Schema, 1625 this="TABLE", 1626 expressions=self._parse_csv(self._parse_struct_types), 1627 ) 1628 if not self._match(TokenType.GT): 1629 self.raise_error("Expecting >") 1630 else: 1631 value = self._parse_schema(exp.var("TABLE")) 1632 else: 1633 value = self._parse_types() 1634 1635 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1636 1637 def _parse_describe(self) -> exp.Describe: 1638 kind = self._match_set(self.CREATABLES) and self._prev.text 1639 this = self._parse_table() 1640 return self.expression(exp.Describe, this=this, kind=kind) 1641 1642 def _parse_insert(self) -> exp.Insert: 1643 overwrite = self._match(TokenType.OVERWRITE) 1644 local = self._match_text_seq("LOCAL") 1645 alternative = None 1646 1647 if self._match_text_seq("DIRECTORY"): 1648 this: t.Optional[exp.Expression] = self.expression( 1649 exp.Directory, 1650 this=self._parse_var_or_string(), 1651 local=local, 1652 row_format=self._parse_row_format(match_row=True), 1653 ) 1654 else: 1655 if self._match(TokenType.OR): 1656 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1657 1658 self._match(TokenType.INTO) 1659 self._match(TokenType.TABLE) 1660 this = self._parse_table(schema=True) 1661 1662 return self.expression( 1663 exp.Insert, 1664 this=this, 1665 exists=self._parse_exists(), 1666 partition=self._parse_partition(), 1667 expression=self._parse_ddl_select(), 1668 conflict=self._parse_on_conflict(), 1669 returning=self._parse_returning(), 1670 overwrite=overwrite, 1671 alternative=alternative, 1672 ) 1673 1674 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1675 conflict = self._match_text_seq("ON", "CONFLICT") 1676 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1677 1678 if not conflict and not duplicate: 1679 return None 1680 1681 nothing = None 1682 expressions = None 1683 key = None 1684 constraint = None 1685 1686 if conflict: 1687 if self._match_text_seq("ON", "CONSTRAINT"): 1688 constraint = self._parse_id_var() 1689 else: 1690 key = self._parse_csv(self._parse_value) 1691 1692 self._match_text_seq("DO") 1693 if self._match_text_seq("NOTHING"): 1694 nothing = True 1695 else: 1696 self._match(TokenType.UPDATE) 1697 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1698 1699 return self.expression( 1700 exp.OnConflict, 1701 duplicate=duplicate, 1702 expressions=expressions, 1703 nothing=nothing, 1704 key=key, 1705 constraint=constraint, 1706 ) 1707 1708 def _parse_returning(self) -> t.Optional[exp.Returning]: 1709 if not self._match(TokenType.RETURNING): 1710 return None 1711 1712 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1713 1714 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1715 if not self._match(TokenType.FORMAT): 1716 return None 1717 return self._parse_row_format() 1718 1719 def _parse_row_format( 1720 self, match_row: bool = False 1721 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1722 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1723 return None 1724 1725 if self._match_text_seq("SERDE"): 1726 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1727 1728 self._match_text_seq("DELIMITED") 1729 1730 kwargs = {} 1731 1732 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1733 kwargs["fields"] = self._parse_string() 1734 if self._match_text_seq("ESCAPED", "BY"): 1735 kwargs["escaped"] = self._parse_string() 1736 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1737 kwargs["collection_items"] = self._parse_string() 1738 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1739 kwargs["map_keys"] = self._parse_string() 1740 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1741 kwargs["lines"] = self._parse_string() 1742 if self._match_text_seq("NULL", "DEFINED", "AS"): 1743 kwargs["null"] = self._parse_string() 1744 1745 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1746 1747 def _parse_load(self) -> exp.LoadData | exp.Command: 1748 if self._match_text_seq("DATA"): 1749 local = self._match_text_seq("LOCAL") 1750 self._match_text_seq("INPATH") 1751 inpath = self._parse_string() 1752 overwrite = self._match(TokenType.OVERWRITE) 1753 self._match_pair(TokenType.INTO, TokenType.TABLE) 1754 1755 return self.expression( 1756 exp.LoadData, 1757 this=self._parse_table(schema=True), 1758 local=local, 1759 overwrite=overwrite, 1760 inpath=inpath, 1761 partition=self._parse_partition(), 1762 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1763 serde=self._match_text_seq("SERDE") and self._parse_string(), 1764 ) 1765 return self._parse_as_command(self._prev) 1766 1767 def _parse_delete(self) -> exp.Delete: 1768 self._match(TokenType.FROM) 1769 1770 return self.expression( 1771 exp.Delete, 1772 this=self._parse_table(), 1773 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1774 where=self._parse_where(), 1775 returning=self._parse_returning(), 1776 ) 1777 1778 def _parse_update(self) -> exp.Update: 1779 return self.expression( 1780 exp.Update, 1781 **{ # type: ignore 1782 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1783 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1784 "from": self._parse_from(modifiers=True), 1785 "where": self._parse_where(), 1786 "returning": self._parse_returning(), 1787 }, 1788 ) 1789 1790 def _parse_uncache(self) -> exp.Uncache: 1791 if not self._match(TokenType.TABLE): 1792 self.raise_error("Expecting TABLE after UNCACHE") 1793 1794 return self.expression( 1795 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1796 ) 1797 1798 def _parse_cache(self) -> exp.Cache: 1799 lazy = self._match_text_seq("LAZY") 1800 self._match(TokenType.TABLE) 1801 table = self._parse_table(schema=True) 1802 1803 options = [] 1804 if self._match_text_seq("OPTIONS"): 1805 self._match_l_paren() 1806 k = self._parse_string() 1807 self._match(TokenType.EQ) 1808 v = self._parse_string() 1809 options = [k, v] 1810 self._match_r_paren() 1811 1812 self._match(TokenType.ALIAS) 1813 return self.expression( 1814 exp.Cache, 1815 this=table, 1816 lazy=lazy, 1817 options=options, 1818 expression=self._parse_select(nested=True), 1819 ) 1820 1821 def _parse_partition(self) -> t.Optional[exp.Partition]: 1822 if not self._match(TokenType.PARTITION): 1823 return None 1824 1825 return self.expression( 1826 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1827 ) 1828 1829 def _parse_value(self) -> exp.Tuple: 1830 if self._match(TokenType.L_PAREN): 1831 expressions = self._parse_csv(self._parse_conjunction) 1832 self._match_r_paren() 1833 return self.expression(exp.Tuple, expressions=expressions) 1834 1835 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1836 # Source: https://prestodb.io/docs/current/sql/values.html 1837 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1838 1839 def _parse_select( 1840 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1841 ) -> t.Optional[exp.Expression]: 1842 cte = self._parse_with() 1843 if cte: 1844 this = self._parse_statement() 1845 1846 if not this: 1847 self.raise_error("Failed to parse any statement following CTE") 1848 return cte 1849 1850 if "with" in this.arg_types: 1851 this.set("with", cte) 1852 else: 1853 self.raise_error(f"{this.key} does not support CTE") 1854 this = cte 1855 elif self._match(TokenType.SELECT): 1856 comments = self._prev_comments 1857 1858 hint = self._parse_hint() 1859 all_ = self._match(TokenType.ALL) 1860 distinct = self._match(TokenType.DISTINCT) 1861 1862 kind = ( 1863 self._match(TokenType.ALIAS) 1864 and self._match_texts(("STRUCT", "VALUE")) 1865 and self._prev.text 1866 ) 1867 1868 if distinct: 1869 distinct = self.expression( 1870 exp.Distinct, 1871 on=self._parse_value() if self._match(TokenType.ON) else None, 1872 ) 1873 1874 if all_ and distinct: 1875 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1876 1877 limit = self._parse_limit(top=True) 1878 expressions = self._parse_csv(self._parse_expression) 1879 1880 this = self.expression( 1881 exp.Select, 1882 kind=kind, 1883 hint=hint, 1884 distinct=distinct, 1885 expressions=expressions, 1886 limit=limit, 1887 ) 1888 this.comments = comments 1889 1890 into = self._parse_into() 1891 if into: 1892 this.set("into", into) 1893 1894 from_ = self._parse_from() 1895 if from_: 1896 this.set("from", from_) 1897 1898 this = self._parse_query_modifiers(this) 1899 elif (table or nested) and self._match(TokenType.L_PAREN): 1900 if self._match(TokenType.PIVOT): 1901 this = self._parse_simplified_pivot() 1902 elif self._match(TokenType.FROM): 1903 this = exp.select("*").from_( 1904 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1905 ) 1906 else: 1907 this = self._parse_table() if table else self._parse_select(nested=True) 1908 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1909 1910 self._match_r_paren() 1911 1912 # early return so that subquery unions aren't parsed again 1913 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1914 # Union ALL should be a property of the top select node, not the subquery 1915 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1916 elif self._match(TokenType.VALUES): 1917 this = self.expression( 1918 exp.Values, 1919 expressions=self._parse_csv(self._parse_value), 1920 alias=self._parse_table_alias(), 1921 ) 1922 else: 1923 this = None 1924 1925 return self._parse_set_operations(this) 1926 1927 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1928 if not skip_with_token and not self._match(TokenType.WITH): 1929 return None 1930 1931 comments = self._prev_comments 1932 recursive = self._match(TokenType.RECURSIVE) 1933 1934 expressions = [] 1935 while True: 1936 expressions.append(self._parse_cte()) 1937 1938 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1939 break 1940 else: 1941 self._match(TokenType.WITH) 1942 1943 return self.expression( 1944 exp.With, comments=comments, expressions=expressions, recursive=recursive 1945 ) 1946 1947 def _parse_cte(self) -> exp.CTE: 1948 alias = self._parse_table_alias() 1949 if not alias or not alias.this: 1950 self.raise_error("Expected CTE to have alias") 1951 1952 self._match(TokenType.ALIAS) 1953 return self.expression( 1954 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1955 ) 1956 1957 def _parse_table_alias( 1958 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1959 ) -> t.Optional[exp.TableAlias]: 1960 any_token = self._match(TokenType.ALIAS) 1961 alias = ( 1962 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1963 or self._parse_string_as_identifier() 1964 ) 1965 1966 index = self._index 1967 if self._match(TokenType.L_PAREN): 1968 columns = self._parse_csv(self._parse_function_parameter) 1969 self._match_r_paren() if columns else self._retreat(index) 1970 else: 1971 columns = None 1972 1973 if not alias and not columns: 1974 return None 1975 1976 return self.expression(exp.TableAlias, this=alias, columns=columns) 1977 1978 def _parse_subquery( 1979 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1980 ) -> t.Optional[exp.Subquery]: 1981 if not this: 1982 return None 1983 1984 return self.expression( 1985 exp.Subquery, 1986 this=this, 1987 pivots=self._parse_pivots(), 1988 alias=self._parse_table_alias() if parse_alias else None, 1989 ) 1990 1991 def _parse_query_modifiers( 1992 self, this: t.Optional[exp.Expression] 1993 ) -> t.Optional[exp.Expression]: 1994 if isinstance(this, self.MODIFIABLES): 1995 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 1996 expression = parser(self) 1997 1998 if expression: 1999 this.set(key, expression) 2000 return this 2001 2002 def _parse_hint(self) -> t.Optional[exp.Hint]: 2003 if self._match(TokenType.HINT): 2004 hints = self._parse_csv(self._parse_function) 2005 2006 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2007 self.raise_error("Expected */ after HINT") 2008 2009 return self.expression(exp.Hint, expressions=hints) 2010 2011 return None 2012 2013 def _parse_into(self) -> t.Optional[exp.Into]: 2014 if not self._match(TokenType.INTO): 2015 return None 2016 2017 temp = self._match(TokenType.TEMPORARY) 2018 unlogged = self._match_text_seq("UNLOGGED") 2019 self._match(TokenType.TABLE) 2020 2021 return self.expression( 2022 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2023 ) 2024 2025 def _parse_from( 2026 self, modifiers: bool = False, skip_from_token: bool = False 2027 ) -> t.Optional[exp.From]: 2028 if not skip_from_token and not self._match(TokenType.FROM): 2029 return None 2030 2031 comments = self._prev_comments 2032 this = self._parse_table() 2033 2034 return self.expression( 2035 exp.From, 2036 comments=comments, 2037 this=self._parse_query_modifiers(this) if modifiers else this, 2038 ) 2039 2040 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2041 if not self._match(TokenType.MATCH_RECOGNIZE): 2042 return None 2043 2044 self._match_l_paren() 2045 2046 partition = self._parse_partition_by() 2047 order = self._parse_order() 2048 measures = ( 2049 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2050 ) 2051 2052 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2053 rows = exp.var("ONE ROW PER MATCH") 2054 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2055 text = "ALL ROWS PER MATCH" 2056 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2057 text += f" SHOW EMPTY MATCHES" 2058 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2059 text += f" OMIT EMPTY MATCHES" 2060 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2061 text += f" WITH UNMATCHED ROWS" 2062 rows = exp.var(text) 2063 else: 2064 rows = None 2065 2066 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2067 text = "AFTER MATCH SKIP" 2068 if self._match_text_seq("PAST", "LAST", "ROW"): 2069 text += f" PAST LAST ROW" 2070 elif self._match_text_seq("TO", "NEXT", "ROW"): 2071 text += f" TO NEXT ROW" 2072 elif self._match_text_seq("TO", "FIRST"): 2073 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2074 elif self._match_text_seq("TO", "LAST"): 2075 text += f" TO LAST {self._advance_any().text}" # type: ignore 2076 after = exp.var(text) 2077 else: 2078 after = None 2079 2080 if self._match_text_seq("PATTERN"): 2081 self._match_l_paren() 2082 2083 if not self._curr: 2084 self.raise_error("Expecting )", self._curr) 2085 2086 paren = 1 2087 start = self._curr 2088 2089 while self._curr and paren > 0: 2090 if self._curr.token_type == TokenType.L_PAREN: 2091 paren += 1 2092 if self._curr.token_type == TokenType.R_PAREN: 2093 paren -= 1 2094 2095 end = self._prev 2096 self._advance() 2097 2098 if paren > 0: 2099 self.raise_error("Expecting )", self._curr) 2100 2101 pattern = exp.var(self._find_sql(start, end)) 2102 else: 2103 pattern = None 2104 2105 define = ( 2106 self._parse_csv( 2107 lambda: self.expression( 2108 exp.Alias, 2109 alias=self._parse_id_var(any_token=True), 2110 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2111 ) 2112 ) 2113 if self._match_text_seq("DEFINE") 2114 else None 2115 ) 2116 2117 self._match_r_paren() 2118 2119 return self.expression( 2120 exp.MatchRecognize, 2121 partition_by=partition, 2122 order=order, 2123 measures=measures, 2124 rows=rows, 2125 after=after, 2126 pattern=pattern, 2127 define=define, 2128 alias=self._parse_table_alias(), 2129 ) 2130 2131 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2132 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2133 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2134 2135 if outer_apply or cross_apply: 2136 this = self._parse_select(table=True) 2137 view = None 2138 outer = not cross_apply 2139 elif self._match(TokenType.LATERAL): 2140 this = self._parse_select(table=True) 2141 view = self._match(TokenType.VIEW) 2142 outer = self._match(TokenType.OUTER) 2143 else: 2144 return None 2145 2146 if not this: 2147 this = self._parse_function() or self._parse_id_var(any_token=False) 2148 while self._match(TokenType.DOT): 2149 this = exp.Dot( 2150 this=this, 2151 expression=self._parse_function() or self._parse_id_var(any_token=False), 2152 ) 2153 2154 if view: 2155 table = self._parse_id_var(any_token=False) 2156 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2157 table_alias: t.Optional[exp.TableAlias] = self.expression( 2158 exp.TableAlias, this=table, columns=columns 2159 ) 2160 else: 2161 table_alias = self._parse_table_alias() 2162 2163 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2164 2165 def _parse_join_parts( 2166 self, 2167 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2168 return ( 2169 self._match_set(self.JOIN_METHODS) and self._prev, 2170 self._match_set(self.JOIN_SIDES) and self._prev, 2171 self._match_set(self.JOIN_KINDS) and self._prev, 2172 ) 2173 2174 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2175 if self._match(TokenType.COMMA): 2176 return self.expression(exp.Join, this=self._parse_table()) 2177 2178 index = self._index 2179 method, side, kind = self._parse_join_parts() 2180 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2181 join = self._match(TokenType.JOIN) 2182 2183 if not skip_join_token and not join: 2184 self._retreat(index) 2185 kind = None 2186 method = None 2187 side = None 2188 2189 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2190 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2191 2192 if not skip_join_token and not join and not outer_apply and not cross_apply: 2193 return None 2194 2195 if outer_apply: 2196 side = Token(TokenType.LEFT, "LEFT") 2197 2198 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2199 2200 if method: 2201 kwargs["method"] = method.text 2202 if side: 2203 kwargs["side"] = side.text 2204 if kind: 2205 kwargs["kind"] = kind.text 2206 if hint: 2207 kwargs["hint"] = hint 2208 2209 if self._match(TokenType.ON): 2210 kwargs["on"] = self._parse_conjunction() 2211 elif self._match(TokenType.USING): 2212 kwargs["using"] = self._parse_wrapped_id_vars() 2213 2214 return self.expression(exp.Join, **kwargs) 2215 2216 def _parse_index( 2217 self, 2218 index: t.Optional[exp.Expression] = None, 2219 ) -> t.Optional[exp.Index]: 2220 if index: 2221 unique = None 2222 primary = None 2223 amp = None 2224 2225 self._match(TokenType.ON) 2226 self._match(TokenType.TABLE) # hive 2227 table = self._parse_table_parts(schema=True) 2228 else: 2229 unique = self._match(TokenType.UNIQUE) 2230 primary = self._match_text_seq("PRIMARY") 2231 amp = self._match_text_seq("AMP") 2232 2233 if not self._match(TokenType.INDEX): 2234 return None 2235 2236 index = self._parse_id_var() 2237 table = None 2238 2239 using = self._parse_field() if self._match(TokenType.USING) else None 2240 2241 if self._match(TokenType.L_PAREN, advance=False): 2242 columns = self._parse_wrapped_csv(self._parse_ordered) 2243 else: 2244 columns = None 2245 2246 return self.expression( 2247 exp.Index, 2248 this=index, 2249 table=table, 2250 using=using, 2251 columns=columns, 2252 unique=unique, 2253 primary=primary, 2254 amp=amp, 2255 partition_by=self._parse_partition_by(), 2256 ) 2257 2258 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2259 return ( 2260 (not schema and self._parse_function()) 2261 or self._parse_id_var(any_token=False) 2262 or self._parse_string_as_identifier() 2263 or self._parse_placeholder() 2264 ) 2265 2266 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2267 catalog = None 2268 db = None 2269 table = self._parse_table_part(schema=schema) 2270 2271 while self._match(TokenType.DOT): 2272 if catalog: 2273 # This allows nesting the table in arbitrarily many dot expressions if needed 2274 table = self.expression( 2275 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2276 ) 2277 else: 2278 catalog = db 2279 db = table 2280 table = self._parse_table_part(schema=schema) 2281 2282 if not table: 2283 self.raise_error(f"Expected table name but got {self._curr}") 2284 2285 return self.expression( 2286 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2287 ) 2288 2289 def _parse_table( 2290 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2291 ) -> t.Optional[exp.Expression]: 2292 lateral = self._parse_lateral() 2293 if lateral: 2294 return lateral 2295 2296 unnest = self._parse_unnest() 2297 if unnest: 2298 return unnest 2299 2300 values = self._parse_derived_table_values() 2301 if values: 2302 return values 2303 2304 subquery = self._parse_select(table=True) 2305 if subquery: 2306 if not subquery.args.get("pivots"): 2307 subquery.set("pivots", self._parse_pivots()) 2308 return subquery 2309 2310 this: exp.Expression = self._parse_table_parts(schema=schema) 2311 2312 if schema: 2313 return self._parse_schema(this=this) 2314 2315 if self.ALIAS_POST_TABLESAMPLE: 2316 table_sample = self._parse_table_sample() 2317 2318 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2319 if alias: 2320 this.set("alias", alias) 2321 2322 if not this.args.get("pivots"): 2323 this.set("pivots", self._parse_pivots()) 2324 2325 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2326 this.set( 2327 "hints", 2328 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2329 ) 2330 self._match_r_paren() 2331 2332 if not self.ALIAS_POST_TABLESAMPLE: 2333 table_sample = self._parse_table_sample() 2334 2335 if table_sample: 2336 table_sample.set("this", this) 2337 this = table_sample 2338 2339 return this 2340 2341 def _parse_unnest(self) -> t.Optional[exp.Unnest]: 2342 if not self._match(TokenType.UNNEST): 2343 return None 2344 2345 expressions = self._parse_wrapped_csv(self._parse_type) 2346 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2347 alias = self._parse_table_alias() 2348 2349 if alias and self.UNNEST_COLUMN_ONLY: 2350 if alias.args.get("columns"): 2351 self.raise_error("Unexpected extra column alias in unnest.") 2352 2353 alias.set("columns", [alias.this]) 2354 alias.set("this", None) 2355 2356 offset = None 2357 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2358 self._match(TokenType.ALIAS) 2359 offset = self._parse_id_var() or exp.to_identifier("offset") 2360 2361 return self.expression( 2362 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2363 ) 2364 2365 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2367 if not is_derived and not self._match(TokenType.VALUES): 2368 return None 2369 2370 expressions = self._parse_csv(self._parse_value) 2371 2372 if is_derived: 2373 self._match_r_paren() 2374 2375 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2376 2377 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2378 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2379 as_modifier and self._match_text_seq("USING", "SAMPLE") 2380 ): 2381 return None 2382 2383 bucket_numerator = None 2384 bucket_denominator = None 2385 bucket_field = None 2386 percent = None 2387 rows = None 2388 size = None 2389 seed = None 2390 2391 kind = ( 2392 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2393 ) 2394 method = self._parse_var(tokens=(TokenType.ROW,)) 2395 2396 self._match(TokenType.L_PAREN) 2397 2398 num = self._parse_number() 2399 2400 if self._match_text_seq("BUCKET"): 2401 bucket_numerator = self._parse_number() 2402 self._match_text_seq("OUT", "OF") 2403 bucket_denominator = bucket_denominator = self._parse_number() 2404 self._match(TokenType.ON) 2405 bucket_field = self._parse_field() 2406 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2407 percent = num 2408 elif self._match(TokenType.ROWS): 2409 rows = num 2410 else: 2411 size = num 2412 2413 self._match(TokenType.R_PAREN) 2414 2415 if self._match(TokenType.L_PAREN): 2416 method = self._parse_var() 2417 seed = self._match(TokenType.COMMA) and self._parse_number() 2418 self._match_r_paren() 2419 elif self._match_texts(("SEED", "REPEATABLE")): 2420 seed = self._parse_wrapped(self._parse_number) 2421 2422 return self.expression( 2423 exp.TableSample, 2424 method=method, 2425 bucket_numerator=bucket_numerator, 2426 bucket_denominator=bucket_denominator, 2427 bucket_field=bucket_field, 2428 percent=percent, 2429 rows=rows, 2430 size=size, 2431 seed=seed, 2432 kind=kind, 2433 ) 2434 2435 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2436 return list(iter(self._parse_pivot, None)) 2437 2438 # https://duckdb.org/docs/sql/statements/pivot 2439 def _parse_simplified_pivot(self) -> exp.Pivot: 2440 def _parse_on() -> t.Optional[exp.Expression]: 2441 this = self._parse_bitwise() 2442 return self._parse_in(this) if self._match(TokenType.IN) else this 2443 2444 this = self._parse_table() 2445 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2446 using = self._match(TokenType.USING) and self._parse_csv( 2447 lambda: self._parse_alias(self._parse_function()) 2448 ) 2449 group = self._parse_group() 2450 return self.expression( 2451 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2452 ) 2453 2454 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2455 index = self._index 2456 2457 if self._match(TokenType.PIVOT): 2458 unpivot = False 2459 elif self._match(TokenType.UNPIVOT): 2460 unpivot = True 2461 else: 2462 return None 2463 2464 expressions = [] 2465 field = None 2466 2467 if not self._match(TokenType.L_PAREN): 2468 self._retreat(index) 2469 return None 2470 2471 if unpivot: 2472 expressions = self._parse_csv(self._parse_column) 2473 else: 2474 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2475 2476 if not expressions: 2477 self.raise_error("Failed to parse PIVOT's aggregation list") 2478 2479 if not self._match(TokenType.FOR): 2480 self.raise_error("Expecting FOR") 2481 2482 value = self._parse_column() 2483 2484 if not self._match(TokenType.IN): 2485 self.raise_error("Expecting IN") 2486 2487 field = self._parse_in(value, alias=True) 2488 2489 self._match_r_paren() 2490 2491 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2492 2493 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2494 pivot.set("alias", self._parse_table_alias()) 2495 2496 if not unpivot: 2497 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2498 2499 columns: t.List[exp.Expression] = [] 2500 for fld in pivot.args["field"].expressions: 2501 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2502 for name in names: 2503 if self.PREFIXED_PIVOT_COLUMNS: 2504 name = f"{name}_{field_name}" if name else field_name 2505 else: 2506 name = f"{field_name}_{name}" if name else field_name 2507 2508 columns.append(exp.to_identifier(name)) 2509 2510 pivot.set("columns", columns) 2511 2512 return pivot 2513 2514 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2515 return [agg.alias for agg in aggregations] 2516 2517 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2518 if not skip_where_token and not self._match(TokenType.WHERE): 2519 return None 2520 2521 return self.expression( 2522 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2523 ) 2524 2525 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2526 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2527 return None 2528 2529 elements = defaultdict(list) 2530 2531 while True: 2532 expressions = self._parse_csv(self._parse_conjunction) 2533 if expressions: 2534 elements["expressions"].extend(expressions) 2535 2536 grouping_sets = self._parse_grouping_sets() 2537 if grouping_sets: 2538 elements["grouping_sets"].extend(grouping_sets) 2539 2540 rollup = None 2541 cube = None 2542 totals = None 2543 2544 with_ = self._match(TokenType.WITH) 2545 if self._match(TokenType.ROLLUP): 2546 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2547 elements["rollup"].extend(ensure_list(rollup)) 2548 2549 if self._match(TokenType.CUBE): 2550 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2551 elements["cube"].extend(ensure_list(cube)) 2552 2553 if self._match_text_seq("TOTALS"): 2554 totals = True 2555 elements["totals"] = True # type: ignore 2556 2557 if not (grouping_sets or rollup or cube or totals): 2558 break 2559 2560 return self.expression(exp.Group, **elements) # type: ignore 2561 2562 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2563 if not self._match(TokenType.GROUPING_SETS): 2564 return None 2565 2566 return self._parse_wrapped_csv(self._parse_grouping_set) 2567 2568 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2569 if self._match(TokenType.L_PAREN): 2570 grouping_set = self._parse_csv(self._parse_column) 2571 self._match_r_paren() 2572 return self.expression(exp.Tuple, expressions=grouping_set) 2573 2574 return self._parse_column() 2575 2576 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2577 if not skip_having_token and not self._match(TokenType.HAVING): 2578 return None 2579 return self.expression(exp.Having, this=self._parse_conjunction()) 2580 2581 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2582 if not self._match(TokenType.QUALIFY): 2583 return None 2584 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2585 2586 def _parse_order( 2587 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2588 ) -> t.Optional[exp.Expression]: 2589 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2590 return this 2591 2592 return self.expression( 2593 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2594 ) 2595 2596 def _parse_sort(self, exp_class: t.Type[E], *texts: str) -> t.Optional[E]: 2597 if not self._match_text_seq(*texts): 2598 return None 2599 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2600 2601 def _parse_ordered(self) -> exp.Ordered: 2602 this = self._parse_conjunction() 2603 self._match(TokenType.ASC) 2604 2605 is_desc = self._match(TokenType.DESC) 2606 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2607 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2608 desc = is_desc or False 2609 asc = not desc 2610 nulls_first = is_nulls_first or False 2611 explicitly_null_ordered = is_nulls_first or is_nulls_last 2612 2613 if ( 2614 not explicitly_null_ordered 2615 and ( 2616 (asc and self.NULL_ORDERING == "nulls_are_small") 2617 or (desc and self.NULL_ORDERING != "nulls_are_small") 2618 ) 2619 and self.NULL_ORDERING != "nulls_are_last" 2620 ): 2621 nulls_first = True 2622 2623 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2624 2625 def _parse_limit( 2626 self, this: t.Optional[exp.Expression] = None, top: bool = False 2627 ) -> t.Optional[exp.Expression]: 2628 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2629 limit_paren = self._match(TokenType.L_PAREN) 2630 limit_exp = self.expression( 2631 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2632 ) 2633 2634 if limit_paren: 2635 self._match_r_paren() 2636 2637 return limit_exp 2638 2639 if self._match(TokenType.FETCH): 2640 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2641 direction = self._prev.text if direction else "FIRST" 2642 2643 count = self._parse_number() 2644 percent = self._match(TokenType.PERCENT) 2645 2646 self._match_set((TokenType.ROW, TokenType.ROWS)) 2647 2648 only = self._match_text_seq("ONLY") 2649 with_ties = self._match_text_seq("WITH", "TIES") 2650 2651 if only and with_ties: 2652 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2653 2654 return self.expression( 2655 exp.Fetch, 2656 direction=direction, 2657 count=count, 2658 percent=percent, 2659 with_ties=with_ties, 2660 ) 2661 2662 return this 2663 2664 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2665 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2666 return this 2667 2668 count = self._parse_number() 2669 self._match_set((TokenType.ROW, TokenType.ROWS)) 2670 return self.expression(exp.Offset, this=this, expression=count) 2671 2672 def _parse_locks(self) -> t.List[exp.Lock]: 2673 locks = [] 2674 while True: 2675 if self._match_text_seq("FOR", "UPDATE"): 2676 update = True 2677 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2678 "LOCK", "IN", "SHARE", "MODE" 2679 ): 2680 update = False 2681 else: 2682 break 2683 2684 expressions = None 2685 if self._match_text_seq("OF"): 2686 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2687 2688 wait: t.Optional[bool | exp.Expression] = None 2689 if self._match_text_seq("NOWAIT"): 2690 wait = True 2691 elif self._match_text_seq("WAIT"): 2692 wait = self._parse_primary() 2693 elif self._match_text_seq("SKIP", "LOCKED"): 2694 wait = False 2695 2696 locks.append( 2697 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2698 ) 2699 2700 return locks 2701 2702 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2703 if not self._match_set(self.SET_OPERATIONS): 2704 return this 2705 2706 token_type = self._prev.token_type 2707 2708 if token_type == TokenType.UNION: 2709 expression = exp.Union 2710 elif token_type == TokenType.EXCEPT: 2711 expression = exp.Except 2712 else: 2713 expression = exp.Intersect 2714 2715 return self.expression( 2716 expression, 2717 this=this, 2718 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2719 expression=self._parse_set_operations(self._parse_select(nested=True)), 2720 ) 2721 2722 def _parse_expression(self) -> t.Optional[exp.Expression]: 2723 return self._parse_alias(self._parse_conjunction()) 2724 2725 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2726 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2727 2728 def _parse_equality(self) -> t.Optional[exp.Expression]: 2729 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2730 2731 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2732 return self._parse_tokens(self._parse_range, self.COMPARISON) 2733 2734 def _parse_range(self) -> t.Optional[exp.Expression]: 2735 this = self._parse_bitwise() 2736 negate = self._match(TokenType.NOT) 2737 2738 if self._match_set(self.RANGE_PARSERS): 2739 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2740 if not expression: 2741 return this 2742 2743 this = expression 2744 elif self._match(TokenType.ISNULL): 2745 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2746 2747 # Postgres supports ISNULL and NOTNULL for conditions. 2748 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2749 if self._match(TokenType.NOTNULL): 2750 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2751 this = self.expression(exp.Not, this=this) 2752 2753 if negate: 2754 this = self.expression(exp.Not, this=this) 2755 2756 if self._match(TokenType.IS): 2757 this = self._parse_is(this) 2758 2759 return this 2760 2761 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2762 index = self._index - 1 2763 negate = self._match(TokenType.NOT) 2764 2765 if self._match_text_seq("DISTINCT", "FROM"): 2766 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2767 return self.expression(klass, this=this, expression=self._parse_expression()) 2768 2769 expression = self._parse_null() or self._parse_boolean() 2770 if not expression: 2771 self._retreat(index) 2772 return None 2773 2774 this = self.expression(exp.Is, this=this, expression=expression) 2775 return self.expression(exp.Not, this=this) if negate else this 2776 2777 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2778 unnest = self._parse_unnest() 2779 if unnest: 2780 this = self.expression(exp.In, this=this, unnest=unnest) 2781 elif self._match(TokenType.L_PAREN): 2782 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2783 2784 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2785 this = self.expression(exp.In, this=this, query=expressions[0]) 2786 else: 2787 this = self.expression(exp.In, this=this, expressions=expressions) 2788 2789 self._match_r_paren(this) 2790 else: 2791 this = self.expression(exp.In, this=this, field=self._parse_field()) 2792 2793 return this 2794 2795 def _parse_between(self, this: exp.Expression) -> exp.Between: 2796 low = self._parse_bitwise() 2797 self._match(TokenType.AND) 2798 high = self._parse_bitwise() 2799 return self.expression(exp.Between, this=this, low=low, high=high) 2800 2801 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2802 if not self._match(TokenType.ESCAPE): 2803 return this 2804 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2805 2806 def _parse_interval(self) -> t.Optional[exp.Interval]: 2807 if not self._match(TokenType.INTERVAL): 2808 return None 2809 2810 this = self._parse_primary() or self._parse_term() 2811 unit = self._parse_function() or self._parse_var() 2812 2813 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2814 # each INTERVAL expression into this canonical form so it's easy to transpile 2815 if this and this.is_number: 2816 this = exp.Literal.string(this.name) 2817 elif this and this.is_string: 2818 parts = this.name.split() 2819 2820 if len(parts) == 2: 2821 if unit: 2822 # this is not actually a unit, it's something else 2823 unit = None 2824 self._retreat(self._index - 1) 2825 else: 2826 this = exp.Literal.string(parts[0]) 2827 unit = self.expression(exp.Var, this=parts[1]) 2828 2829 return self.expression(exp.Interval, this=this, unit=unit) 2830 2831 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2832 this = self._parse_term() 2833 2834 while True: 2835 if self._match_set(self.BITWISE): 2836 this = self.expression( 2837 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2838 ) 2839 elif self._match_pair(TokenType.LT, TokenType.LT): 2840 this = self.expression( 2841 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2842 ) 2843 elif self._match_pair(TokenType.GT, TokenType.GT): 2844 this = self.expression( 2845 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2846 ) 2847 else: 2848 break 2849 2850 return this 2851 2852 def _parse_term(self) -> t.Optional[exp.Expression]: 2853 return self._parse_tokens(self._parse_factor, self.TERM) 2854 2855 def _parse_factor(self) -> t.Optional[exp.Expression]: 2856 return self._parse_tokens(self._parse_unary, self.FACTOR) 2857 2858 def _parse_unary(self) -> t.Optional[exp.Expression]: 2859 if self._match_set(self.UNARY_PARSERS): 2860 return self.UNARY_PARSERS[self._prev.token_type](self) 2861 return self._parse_at_time_zone(self._parse_type()) 2862 2863 def _parse_type(self) -> t.Optional[exp.Expression]: 2864 interval = self._parse_interval() 2865 if interval: 2866 return interval 2867 2868 index = self._index 2869 data_type = self._parse_types(check_func=True) 2870 this = self._parse_column() 2871 2872 if data_type: 2873 if isinstance(this, exp.Literal): 2874 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2875 if parser: 2876 return parser(self, this, data_type) 2877 return self.expression(exp.Cast, this=this, to=data_type) 2878 if not data_type.expressions: 2879 self._retreat(index) 2880 return self._parse_column() 2881 return self._parse_column_ops(data_type) 2882 2883 return this 2884 2885 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2886 this = self._parse_type() 2887 if not this: 2888 return None 2889 2890 return self.expression( 2891 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2892 ) 2893 2894 def _parse_types( 2895 self, check_func: bool = False, schema: bool = False 2896 ) -> t.Optional[exp.Expression]: 2897 index = self._index 2898 2899 prefix = self._match_text_seq("SYSUDTLIB", ".") 2900 2901 if not self._match_set(self.TYPE_TOKENS): 2902 return None 2903 2904 type_token = self._prev.token_type 2905 2906 if type_token == TokenType.PSEUDO_TYPE: 2907 return self.expression(exp.PseudoType, this=self._prev.text) 2908 2909 nested = type_token in self.NESTED_TYPE_TOKENS 2910 is_struct = type_token == TokenType.STRUCT 2911 expressions = None 2912 maybe_func = False 2913 2914 if self._match(TokenType.L_PAREN): 2915 if is_struct: 2916 expressions = self._parse_csv(self._parse_struct_types) 2917 elif nested: 2918 expressions = self._parse_csv( 2919 lambda: self._parse_types(check_func=check_func, schema=schema) 2920 ) 2921 else: 2922 expressions = self._parse_csv(self._parse_type_size) 2923 2924 if not expressions or not self._match(TokenType.R_PAREN): 2925 self._retreat(index) 2926 return None 2927 2928 maybe_func = True 2929 2930 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2931 this = exp.DataType( 2932 this=exp.DataType.Type.ARRAY, 2933 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2934 nested=True, 2935 ) 2936 2937 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2938 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 2939 2940 return this 2941 2942 if self._match(TokenType.L_BRACKET): 2943 self._retreat(index) 2944 return None 2945 2946 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2947 if nested and self._match(TokenType.LT): 2948 if is_struct: 2949 expressions = self._parse_csv(self._parse_struct_types) 2950 else: 2951 expressions = self._parse_csv( 2952 lambda: self._parse_types(check_func=check_func, schema=schema) 2953 ) 2954 2955 if not self._match(TokenType.GT): 2956 self.raise_error("Expecting >") 2957 2958 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2959 values = self._parse_csv(self._parse_conjunction) 2960 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2961 2962 value: t.Optional[exp.Expression] = None 2963 if type_token in self.TIMESTAMPS: 2964 if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ: 2965 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2966 elif ( 2967 self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE") 2968 or type_token == TokenType.TIMESTAMPLTZ 2969 ): 2970 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2971 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 2972 if type_token == TokenType.TIME: 2973 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2974 else: 2975 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2976 2977 maybe_func = maybe_func and value is None 2978 2979 if value is None: 2980 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2981 elif type_token == TokenType.INTERVAL: 2982 unit = self._parse_var() 2983 2984 if not unit: 2985 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2986 else: 2987 value = self.expression(exp.Interval, unit=unit) 2988 2989 if maybe_func and check_func: 2990 index2 = self._index 2991 peek = self._parse_string() 2992 2993 if not peek: 2994 self._retreat(index) 2995 return None 2996 2997 self._retreat(index2) 2998 2999 if value: 3000 return value 3001 3002 return exp.DataType( 3003 this=exp.DataType.Type[type_token.value.upper()], 3004 expressions=expressions, 3005 nested=nested, 3006 values=values, 3007 prefix=prefix, 3008 ) 3009 3010 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3011 this = self._parse_type() or self._parse_id_var() 3012 self._match(TokenType.COLON) 3013 return self._parse_column_def(this) 3014 3015 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3016 if not self._match_text_seq("AT", "TIME", "ZONE"): 3017 return this 3018 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3019 3020 def _parse_column(self) -> t.Optional[exp.Expression]: 3021 this = self._parse_field() 3022 if isinstance(this, exp.Identifier): 3023 this = self.expression(exp.Column, this=this) 3024 elif not this: 3025 return self._parse_bracket(this) 3026 return self._parse_column_ops(this) 3027 3028 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3029 this = self._parse_bracket(this) 3030 3031 while self._match_set(self.COLUMN_OPERATORS): 3032 op_token = self._prev.token_type 3033 op = self.COLUMN_OPERATORS.get(op_token) 3034 3035 if op_token == TokenType.DCOLON: 3036 field = self._parse_types() 3037 if not field: 3038 self.raise_error("Expected type") 3039 elif op and self._curr: 3040 self._advance() 3041 value = self._prev.text 3042 field = ( 3043 exp.Literal.number(value) 3044 if self._prev.token_type == TokenType.NUMBER 3045 else exp.Literal.string(value) 3046 ) 3047 else: 3048 field = self._parse_field(anonymous_func=True) 3049 3050 if isinstance(field, exp.Func): 3051 # bigquery allows function calls like x.y.count(...) 3052 # SAFE.SUBSTR(...) 3053 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3054 this = self._replace_columns_with_dots(this) 3055 3056 if op: 3057 this = op(self, this, field) 3058 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3059 this = self.expression( 3060 exp.Column, 3061 this=field, 3062 table=this.this, 3063 db=this.args.get("table"), 3064 catalog=this.args.get("db"), 3065 ) 3066 else: 3067 this = self.expression(exp.Dot, this=this, expression=field) 3068 this = self._parse_bracket(this) 3069 return this 3070 3071 def _parse_primary(self) -> t.Optional[exp.Expression]: 3072 if self._match_set(self.PRIMARY_PARSERS): 3073 token_type = self._prev.token_type 3074 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3075 3076 if token_type == TokenType.STRING: 3077 expressions = [primary] 3078 while self._match(TokenType.STRING): 3079 expressions.append(exp.Literal.string(self._prev.text)) 3080 3081 if len(expressions) > 1: 3082 return self.expression(exp.Concat, expressions=expressions) 3083 3084 return primary 3085 3086 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3087 return exp.Literal.number(f"0.{self._prev.text}") 3088 3089 if self._match(TokenType.L_PAREN): 3090 comments = self._prev_comments 3091 query = self._parse_select() 3092 3093 if query: 3094 expressions = [query] 3095 else: 3096 expressions = self._parse_csv(self._parse_expression) 3097 3098 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3099 3100 if isinstance(this, exp.Subqueryable): 3101 this = self._parse_set_operations( 3102 self._parse_subquery(this=this, parse_alias=False) 3103 ) 3104 elif len(expressions) > 1: 3105 this = self.expression(exp.Tuple, expressions=expressions) 3106 else: 3107 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3108 3109 if this: 3110 this.add_comments(comments) 3111 3112 self._match_r_paren(expression=this) 3113 return this 3114 3115 return None 3116 3117 def _parse_field( 3118 self, 3119 any_token: bool = False, 3120 tokens: t.Optional[t.Collection[TokenType]] = None, 3121 anonymous_func: bool = False, 3122 ) -> t.Optional[exp.Expression]: 3123 return ( 3124 self._parse_primary() 3125 or self._parse_function(anonymous=anonymous_func) 3126 or self._parse_id_var(any_token=any_token, tokens=tokens) 3127 ) 3128 3129 def _parse_function( 3130 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3131 ) -> t.Optional[exp.Expression]: 3132 if not self._curr: 3133 return None 3134 3135 token_type = self._curr.token_type 3136 3137 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3138 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3139 3140 if not self._next or self._next.token_type != TokenType.L_PAREN: 3141 if token_type in self.NO_PAREN_FUNCTIONS: 3142 self._advance() 3143 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3144 3145 return None 3146 3147 if token_type not in self.FUNC_TOKENS: 3148 return None 3149 3150 this = self._curr.text 3151 upper = this.upper() 3152 self._advance(2) 3153 3154 parser = self.FUNCTION_PARSERS.get(upper) 3155 3156 if parser and not anonymous: 3157 this = parser(self) 3158 else: 3159 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3160 3161 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3162 this = self.expression(subquery_predicate, this=self._parse_select()) 3163 self._match_r_paren() 3164 return this 3165 3166 if functions is None: 3167 functions = self.FUNCTIONS 3168 3169 function = functions.get(upper) 3170 3171 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3172 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3173 3174 if function and not anonymous: 3175 this = self.validate_expression(function(args), args) 3176 else: 3177 this = self.expression(exp.Anonymous, this=this, expressions=args) 3178 3179 self._match_r_paren(this) 3180 return self._parse_window(this) 3181 3182 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3183 return self._parse_column_def(self._parse_id_var()) 3184 3185 def _parse_user_defined_function( 3186 self, kind: t.Optional[TokenType] = None 3187 ) -> t.Optional[exp.Expression]: 3188 this = self._parse_id_var() 3189 3190 while self._match(TokenType.DOT): 3191 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3192 3193 if not self._match(TokenType.L_PAREN): 3194 return this 3195 3196 expressions = self._parse_csv(self._parse_function_parameter) 3197 self._match_r_paren() 3198 return self.expression( 3199 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3200 ) 3201 3202 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3203 literal = self._parse_primary() 3204 if literal: 3205 return self.expression(exp.Introducer, this=token.text, expression=literal) 3206 3207 return self.expression(exp.Identifier, this=token.text) 3208 3209 def _parse_session_parameter(self) -> exp.SessionParameter: 3210 kind = None 3211 this = self._parse_id_var() or self._parse_primary() 3212 3213 if this and self._match(TokenType.DOT): 3214 kind = this.name 3215 this = self._parse_var() or self._parse_primary() 3216 3217 return self.expression(exp.SessionParameter, this=this, kind=kind) 3218 3219 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3220 index = self._index 3221 3222 if self._match(TokenType.L_PAREN): 3223 expressions = self._parse_csv(self._parse_id_var) 3224 3225 if not self._match(TokenType.R_PAREN): 3226 self._retreat(index) 3227 else: 3228 expressions = [self._parse_id_var()] 3229 3230 if self._match_set(self.LAMBDAS): 3231 return self.LAMBDAS[self._prev.token_type](self, expressions) 3232 3233 self._retreat(index) 3234 3235 this: t.Optional[exp.Expression] 3236 3237 if self._match(TokenType.DISTINCT): 3238 this = self.expression( 3239 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3240 ) 3241 else: 3242 this = self._parse_select_or_expression(alias=alias) 3243 3244 if isinstance(this, exp.EQ): 3245 left = this.this 3246 if isinstance(left, exp.Column): 3247 left.replace(exp.var(left.text("this"))) 3248 3249 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3250 3251 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3252 index = self._index 3253 3254 if not self.errors: 3255 try: 3256 if self._parse_select(nested=True): 3257 return this 3258 except ParseError: 3259 pass 3260 finally: 3261 self.errors.clear() 3262 self._retreat(index) 3263 3264 if not self._match(TokenType.L_PAREN): 3265 return this 3266 3267 args = self._parse_csv( 3268 lambda: self._parse_constraint() 3269 or self._parse_column_def(self._parse_field(any_token=True)) 3270 ) 3271 3272 self._match_r_paren() 3273 return self.expression(exp.Schema, this=this, expressions=args) 3274 3275 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3276 # column defs are not really columns, they're identifiers 3277 if isinstance(this, exp.Column): 3278 this = this.this 3279 3280 kind = self._parse_types(schema=True) 3281 3282 if self._match_text_seq("FOR", "ORDINALITY"): 3283 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3284 3285 constraints = [] 3286 while True: 3287 constraint = self._parse_column_constraint() 3288 if not constraint: 3289 break 3290 constraints.append(constraint) 3291 3292 if not kind and not constraints: 3293 return this 3294 3295 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3296 3297 def _parse_auto_increment( 3298 self, 3299 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3300 start = None 3301 increment = None 3302 3303 if self._match(TokenType.L_PAREN, advance=False): 3304 args = self._parse_wrapped_csv(self._parse_bitwise) 3305 start = seq_get(args, 0) 3306 increment = seq_get(args, 1) 3307 elif self._match_text_seq("START"): 3308 start = self._parse_bitwise() 3309 self._match_text_seq("INCREMENT") 3310 increment = self._parse_bitwise() 3311 3312 if start and increment: 3313 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3314 3315 return exp.AutoIncrementColumnConstraint() 3316 3317 def _parse_compress(self) -> exp.CompressColumnConstraint: 3318 if self._match(TokenType.L_PAREN, advance=False): 3319 return self.expression( 3320 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3321 ) 3322 3323 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3324 3325 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3326 if self._match_text_seq("BY", "DEFAULT"): 3327 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3328 this = self.expression( 3329 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3330 ) 3331 else: 3332 self._match_text_seq("ALWAYS") 3333 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3334 3335 self._match(TokenType.ALIAS) 3336 identity = self._match_text_seq("IDENTITY") 3337 3338 if self._match(TokenType.L_PAREN): 3339 if self._match_text_seq("START", "WITH"): 3340 this.set("start", self._parse_bitwise()) 3341 if self._match_text_seq("INCREMENT", "BY"): 3342 this.set("increment", self._parse_bitwise()) 3343 if self._match_text_seq("MINVALUE"): 3344 this.set("minvalue", self._parse_bitwise()) 3345 if self._match_text_seq("MAXVALUE"): 3346 this.set("maxvalue", self._parse_bitwise()) 3347 3348 if self._match_text_seq("CYCLE"): 3349 this.set("cycle", True) 3350 elif self._match_text_seq("NO", "CYCLE"): 3351 this.set("cycle", False) 3352 3353 if not identity: 3354 this.set("expression", self._parse_bitwise()) 3355 3356 self._match_r_paren() 3357 3358 return this 3359 3360 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3361 self._match_text_seq("LENGTH") 3362 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3363 3364 def _parse_not_constraint( 3365 self, 3366 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3367 if self._match_text_seq("NULL"): 3368 return self.expression(exp.NotNullColumnConstraint) 3369 if self._match_text_seq("CASESPECIFIC"): 3370 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3371 return None 3372 3373 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3374 if self._match(TokenType.CONSTRAINT): 3375 this = self._parse_id_var() 3376 else: 3377 this = None 3378 3379 if self._match_texts(self.CONSTRAINT_PARSERS): 3380 return self.expression( 3381 exp.ColumnConstraint, 3382 this=this, 3383 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3384 ) 3385 3386 return this 3387 3388 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3389 if not self._match(TokenType.CONSTRAINT): 3390 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3391 3392 this = self._parse_id_var() 3393 expressions = [] 3394 3395 while True: 3396 constraint = self._parse_unnamed_constraint() or self._parse_function() 3397 if not constraint: 3398 break 3399 expressions.append(constraint) 3400 3401 return self.expression(exp.Constraint, this=this, expressions=expressions) 3402 3403 def _parse_unnamed_constraint( 3404 self, constraints: t.Optional[t.Collection[str]] = None 3405 ) -> t.Optional[exp.Expression]: 3406 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3407 return None 3408 3409 constraint = self._prev.text.upper() 3410 if constraint not in self.CONSTRAINT_PARSERS: 3411 self.raise_error(f"No parser found for schema constraint {constraint}.") 3412 3413 return self.CONSTRAINT_PARSERS[constraint](self) 3414 3415 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3416 self._match_text_seq("KEY") 3417 return self.expression( 3418 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3419 ) 3420 3421 def _parse_key_constraint_options(self) -> t.List[str]: 3422 options = [] 3423 while True: 3424 if not self._curr: 3425 break 3426 3427 if self._match(TokenType.ON): 3428 action = None 3429 on = self._advance_any() and self._prev.text 3430 3431 if self._match_text_seq("NO", "ACTION"): 3432 action = "NO ACTION" 3433 elif self._match_text_seq("CASCADE"): 3434 action = "CASCADE" 3435 elif self._match_pair(TokenType.SET, TokenType.NULL): 3436 action = "SET NULL" 3437 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3438 action = "SET DEFAULT" 3439 else: 3440 self.raise_error("Invalid key constraint") 3441 3442 options.append(f"ON {on} {action}") 3443 elif self._match_text_seq("NOT", "ENFORCED"): 3444 options.append("NOT ENFORCED") 3445 elif self._match_text_seq("DEFERRABLE"): 3446 options.append("DEFERRABLE") 3447 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3448 options.append("INITIALLY DEFERRED") 3449 elif self._match_text_seq("NORELY"): 3450 options.append("NORELY") 3451 elif self._match_text_seq("MATCH", "FULL"): 3452 options.append("MATCH FULL") 3453 else: 3454 break 3455 3456 return options 3457 3458 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3459 if match and not self._match(TokenType.REFERENCES): 3460 return None 3461 3462 expressions = None 3463 this = self._parse_id_var() 3464 3465 if self._match(TokenType.L_PAREN, advance=False): 3466 expressions = self._parse_wrapped_id_vars() 3467 3468 options = self._parse_key_constraint_options() 3469 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3470 3471 def _parse_foreign_key(self) -> exp.ForeignKey: 3472 expressions = self._parse_wrapped_id_vars() 3473 reference = self._parse_references() 3474 options = {} 3475 3476 while self._match(TokenType.ON): 3477 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3478 self.raise_error("Expected DELETE or UPDATE") 3479 3480 kind = self._prev.text.lower() 3481 3482 if self._match_text_seq("NO", "ACTION"): 3483 action = "NO ACTION" 3484 elif self._match(TokenType.SET): 3485 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3486 action = "SET " + self._prev.text.upper() 3487 else: 3488 self._advance() 3489 action = self._prev.text.upper() 3490 3491 options[kind] = action 3492 3493 return self.expression( 3494 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3495 ) 3496 3497 def _parse_primary_key( 3498 self, wrapped_optional: bool = False, in_props: bool = False 3499 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3500 desc = ( 3501 self._match_set((TokenType.ASC, TokenType.DESC)) 3502 and self._prev.token_type == TokenType.DESC 3503 ) 3504 3505 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3506 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3507 3508 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3509 options = self._parse_key_constraint_options() 3510 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3511 3512 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3513 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3514 return this 3515 3516 bracket_kind = self._prev.token_type 3517 3518 if self._match(TokenType.COLON): 3519 expressions: t.List[t.Optional[exp.Expression]] = [ 3520 self.expression(exp.Slice, expression=self._parse_conjunction()) 3521 ] 3522 else: 3523 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3524 3525 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3526 if bracket_kind == TokenType.L_BRACE: 3527 this = self.expression(exp.Struct, expressions=expressions) 3528 elif not this or this.name.upper() == "ARRAY": 3529 this = self.expression(exp.Array, expressions=expressions) 3530 else: 3531 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3532 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3533 3534 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3535 self.raise_error("Expected ]") 3536 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3537 self.raise_error("Expected }") 3538 3539 self._add_comments(this) 3540 return self._parse_bracket(this) 3541 3542 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3543 if self._match(TokenType.COLON): 3544 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3545 return this 3546 3547 def _parse_case(self) -> t.Optional[exp.Expression]: 3548 ifs = [] 3549 default = None 3550 3551 expression = self._parse_conjunction() 3552 3553 while self._match(TokenType.WHEN): 3554 this = self._parse_conjunction() 3555 self._match(TokenType.THEN) 3556 then = self._parse_conjunction() 3557 ifs.append(self.expression(exp.If, this=this, true=then)) 3558 3559 if self._match(TokenType.ELSE): 3560 default = self._parse_conjunction() 3561 3562 if not self._match(TokenType.END): 3563 self.raise_error("Expected END after CASE", self._prev) 3564 3565 return self._parse_window( 3566 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3567 ) 3568 3569 def _parse_if(self) -> t.Optional[exp.Expression]: 3570 if self._match(TokenType.L_PAREN): 3571 args = self._parse_csv(self._parse_conjunction) 3572 this = self.validate_expression(exp.If.from_arg_list(args), args) 3573 self._match_r_paren() 3574 else: 3575 index = self._index - 1 3576 condition = self._parse_conjunction() 3577 3578 if not condition: 3579 self._retreat(index) 3580 return None 3581 3582 self._match(TokenType.THEN) 3583 true = self._parse_conjunction() 3584 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3585 self._match(TokenType.END) 3586 this = self.expression(exp.If, this=condition, true=true, false=false) 3587 3588 return self._parse_window(this) 3589 3590 def _parse_extract(self) -> exp.Extract: 3591 this = self._parse_function() or self._parse_var() or self._parse_type() 3592 3593 if self._match(TokenType.FROM): 3594 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3595 3596 if not self._match(TokenType.COMMA): 3597 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3598 3599 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3600 3601 def _parse_cast(self, strict: bool) -> exp.Expression: 3602 this = self._parse_conjunction() 3603 3604 if not self._match(TokenType.ALIAS): 3605 if self._match(TokenType.COMMA): 3606 return self.expression( 3607 exp.CastToStrType, this=this, expression=self._parse_string() 3608 ) 3609 else: 3610 self.raise_error("Expected AS after CAST") 3611 3612 to = self._parse_types() 3613 3614 if not to: 3615 self.raise_error("Expected TYPE after CAST") 3616 elif to.this == exp.DataType.Type.CHAR: 3617 if self._match(TokenType.CHARACTER_SET): 3618 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3619 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3620 fmt = self._parse_string() 3621 3622 return self.expression( 3623 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3624 this=this, 3625 format=exp.Literal.string( 3626 format_time( 3627 fmt.this if fmt else "", 3628 self.FORMAT_MAPPING or self.TIME_MAPPING, 3629 self.FORMAT_TRIE or self.TIME_TRIE, 3630 ) 3631 ), 3632 ) 3633 3634 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3635 3636 def _parse_concat(self) -> t.Optional[exp.Expression]: 3637 args = self._parse_csv(self._parse_conjunction) 3638 if self.CONCAT_NULL_OUTPUTS_STRING: 3639 args = [exp.func("COALESCE", arg, exp.Literal.string("")) for arg in args] 3640 3641 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3642 # we find such a call we replace it with its argument. 3643 if len(args) == 1: 3644 return args[0] 3645 3646 return self.expression( 3647 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3648 ) 3649 3650 def _parse_string_agg(self) -> exp.Expression: 3651 expression: t.Optional[exp.Expression] 3652 3653 if self._match(TokenType.DISTINCT): 3654 args = self._parse_csv(self._parse_conjunction) 3655 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3656 else: 3657 args = self._parse_csv(self._parse_conjunction) 3658 expression = seq_get(args, 0) 3659 3660 index = self._index 3661 if not self._match(TokenType.R_PAREN): 3662 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3663 order = self._parse_order(this=expression) 3664 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3665 3666 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3667 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3668 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3669 if not self._match_text_seq("WITHIN", "GROUP"): 3670 self._retreat(index) 3671 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3672 3673 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3674 order = self._parse_order(this=expression) 3675 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3676 3677 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3678 to: t.Optional[exp.Expression] 3679 this = self._parse_bitwise() 3680 3681 if self._match(TokenType.USING): 3682 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3683 elif self._match(TokenType.COMMA): 3684 to = self._parse_bitwise() 3685 else: 3686 to = None 3687 3688 # Swap the argument order if needed to produce the correct AST 3689 if self.CONVERT_TYPE_FIRST: 3690 this, to = to, this 3691 3692 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3693 3694 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3695 """ 3696 There are generally two variants of the DECODE function: 3697 3698 - DECODE(bin, charset) 3699 - DECODE(expression, search, result [, search, result] ... [, default]) 3700 3701 The second variant will always be parsed into a CASE expression. Note that NULL 3702 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3703 instead of relying on pattern matching. 3704 """ 3705 args = self._parse_csv(self._parse_conjunction) 3706 3707 if len(args) < 3: 3708 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3709 3710 expression, *expressions = args 3711 if not expression: 3712 return None 3713 3714 ifs = [] 3715 for search, result in zip(expressions[::2], expressions[1::2]): 3716 if not search or not result: 3717 return None 3718 3719 if isinstance(search, exp.Literal): 3720 ifs.append( 3721 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3722 ) 3723 elif isinstance(search, exp.Null): 3724 ifs.append( 3725 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3726 ) 3727 else: 3728 cond = exp.or_( 3729 exp.EQ(this=expression.copy(), expression=search), 3730 exp.and_( 3731 exp.Is(this=expression.copy(), expression=exp.Null()), 3732 exp.Is(this=search.copy(), expression=exp.Null()), 3733 copy=False, 3734 ), 3735 copy=False, 3736 ) 3737 ifs.append(exp.If(this=cond, true=result)) 3738 3739 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3740 3741 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3742 self._match_text_seq("KEY") 3743 key = self._parse_field() 3744 self._match(TokenType.COLON) 3745 self._match_text_seq("VALUE") 3746 value = self._parse_field() 3747 3748 if not key and not value: 3749 return None 3750 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3751 3752 def _parse_json_object(self) -> exp.JSONObject: 3753 expressions = self._parse_csv(self._parse_json_key_value) 3754 3755 null_handling = None 3756 if self._match_text_seq("NULL", "ON", "NULL"): 3757 null_handling = "NULL ON NULL" 3758 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3759 null_handling = "ABSENT ON NULL" 3760 3761 unique_keys = None 3762 if self._match_text_seq("WITH", "UNIQUE"): 3763 unique_keys = True 3764 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3765 unique_keys = False 3766 3767 self._match_text_seq("KEYS") 3768 3769 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3770 format_json = self._match_text_seq("FORMAT", "JSON") 3771 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3772 3773 return self.expression( 3774 exp.JSONObject, 3775 expressions=expressions, 3776 null_handling=null_handling, 3777 unique_keys=unique_keys, 3778 return_type=return_type, 3779 format_json=format_json, 3780 encoding=encoding, 3781 ) 3782 3783 def _parse_logarithm(self) -> exp.Func: 3784 # Default argument order is base, expression 3785 args = self._parse_csv(self._parse_range) 3786 3787 if len(args) > 1: 3788 if not self.LOG_BASE_FIRST: 3789 args.reverse() 3790 return exp.Log.from_arg_list(args) 3791 3792 return self.expression( 3793 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3794 ) 3795 3796 def _parse_match_against(self) -> exp.MatchAgainst: 3797 expressions = self._parse_csv(self._parse_column) 3798 3799 self._match_text_seq(")", "AGAINST", "(") 3800 3801 this = self._parse_string() 3802 3803 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3804 modifier = "IN NATURAL LANGUAGE MODE" 3805 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3806 modifier = f"{modifier} WITH QUERY EXPANSION" 3807 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3808 modifier = "IN BOOLEAN MODE" 3809 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3810 modifier = "WITH QUERY EXPANSION" 3811 else: 3812 modifier = None 3813 3814 return self.expression( 3815 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3816 ) 3817 3818 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3819 def _parse_open_json(self) -> exp.OpenJSON: 3820 this = self._parse_bitwise() 3821 path = self._match(TokenType.COMMA) and self._parse_string() 3822 3823 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3824 this = self._parse_field(any_token=True) 3825 kind = self._parse_types() 3826 path = self._parse_string() 3827 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3828 3829 return self.expression( 3830 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3831 ) 3832 3833 expressions = None 3834 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3835 self._match_l_paren() 3836 expressions = self._parse_csv(_parse_open_json_column_def) 3837 3838 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3839 3840 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3841 args = self._parse_csv(self._parse_bitwise) 3842 3843 if self._match(TokenType.IN): 3844 return self.expression( 3845 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3846 ) 3847 3848 if haystack_first: 3849 haystack = seq_get(args, 0) 3850 needle = seq_get(args, 1) 3851 else: 3852 needle = seq_get(args, 0) 3853 haystack = seq_get(args, 1) 3854 3855 return self.expression( 3856 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3857 ) 3858 3859 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3860 args = self._parse_csv(self._parse_table) 3861 return exp.JoinHint(this=func_name.upper(), expressions=args) 3862 3863 def _parse_substring(self) -> exp.Substring: 3864 # Postgres supports the form: substring(string [from int] [for int]) 3865 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3866 3867 args = self._parse_csv(self._parse_bitwise) 3868 3869 if self._match(TokenType.FROM): 3870 args.append(self._parse_bitwise()) 3871 if self._match(TokenType.FOR): 3872 args.append(self._parse_bitwise()) 3873 3874 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3875 3876 def _parse_trim(self) -> exp.Trim: 3877 # https://www.w3resource.com/sql/character-functions/trim.php 3878 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3879 3880 position = None 3881 collation = None 3882 3883 if self._match_texts(self.TRIM_TYPES): 3884 position = self._prev.text.upper() 3885 3886 expression = self._parse_bitwise() 3887 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3888 this = self._parse_bitwise() 3889 else: 3890 this = expression 3891 expression = None 3892 3893 if self._match(TokenType.COLLATE): 3894 collation = self._parse_bitwise() 3895 3896 return self.expression( 3897 exp.Trim, this=this, position=position, expression=expression, collation=collation 3898 ) 3899 3900 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3901 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3902 3903 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3904 return self._parse_window(self._parse_id_var(), alias=True) 3905 3906 def _parse_respect_or_ignore_nulls( 3907 self, this: t.Optional[exp.Expression] 3908 ) -> t.Optional[exp.Expression]: 3909 if self._match_text_seq("IGNORE", "NULLS"): 3910 return self.expression(exp.IgnoreNulls, this=this) 3911 if self._match_text_seq("RESPECT", "NULLS"): 3912 return self.expression(exp.RespectNulls, this=this) 3913 return this 3914 3915 def _parse_window( 3916 self, this: t.Optional[exp.Expression], alias: bool = False 3917 ) -> t.Optional[exp.Expression]: 3918 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3919 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3920 self._match_r_paren() 3921 3922 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3923 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3924 if self._match_text_seq("WITHIN", "GROUP"): 3925 order = self._parse_wrapped(self._parse_order) 3926 this = self.expression(exp.WithinGroup, this=this, expression=order) 3927 3928 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3929 # Some dialects choose to implement and some do not. 3930 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3931 3932 # There is some code above in _parse_lambda that handles 3933 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3934 3935 # The below changes handle 3936 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3937 3938 # Oracle allows both formats 3939 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3940 # and Snowflake chose to do the same for familiarity 3941 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3942 this = self._parse_respect_or_ignore_nulls(this) 3943 3944 # bigquery select from window x AS (partition by ...) 3945 if alias: 3946 over = None 3947 self._match(TokenType.ALIAS) 3948 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3949 return this 3950 else: 3951 over = self._prev.text.upper() 3952 3953 if not self._match(TokenType.L_PAREN): 3954 return self.expression( 3955 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3956 ) 3957 3958 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3959 3960 first = self._match(TokenType.FIRST) 3961 if self._match_text_seq("LAST"): 3962 first = False 3963 3964 partition = self._parse_partition_by() 3965 order = self._parse_order() 3966 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3967 3968 if kind: 3969 self._match(TokenType.BETWEEN) 3970 start = self._parse_window_spec() 3971 self._match(TokenType.AND) 3972 end = self._parse_window_spec() 3973 3974 spec = self.expression( 3975 exp.WindowSpec, 3976 kind=kind, 3977 start=start["value"], 3978 start_side=start["side"], 3979 end=end["value"], 3980 end_side=end["side"], 3981 ) 3982 else: 3983 spec = None 3984 3985 self._match_r_paren() 3986 3987 return self.expression( 3988 exp.Window, 3989 this=this, 3990 partition_by=partition, 3991 order=order, 3992 spec=spec, 3993 alias=window_alias, 3994 over=over, 3995 first=first, 3996 ) 3997 3998 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3999 self._match(TokenType.BETWEEN) 4000 4001 return { 4002 "value": ( 4003 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4004 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4005 or self._parse_bitwise() 4006 ), 4007 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4008 } 4009 4010 def _parse_alias( 4011 self, this: t.Optional[exp.Expression], explicit: bool = False 4012 ) -> t.Optional[exp.Expression]: 4013 any_token = self._match(TokenType.ALIAS) 4014 4015 if explicit and not any_token: 4016 return this 4017 4018 if self._match(TokenType.L_PAREN): 4019 aliases = self.expression( 4020 exp.Aliases, 4021 this=this, 4022 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4023 ) 4024 self._match_r_paren(aliases) 4025 return aliases 4026 4027 alias = self._parse_id_var(any_token) 4028 4029 if alias: 4030 return self.expression(exp.Alias, this=this, alias=alias) 4031 4032 return this 4033 4034 def _parse_id_var( 4035 self, 4036 any_token: bool = True, 4037 tokens: t.Optional[t.Collection[TokenType]] = None, 4038 ) -> t.Optional[exp.Expression]: 4039 identifier = self._parse_identifier() 4040 4041 if identifier: 4042 return identifier 4043 4044 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4045 quoted = self._prev.token_type == TokenType.STRING 4046 return exp.Identifier(this=self._prev.text, quoted=quoted) 4047 4048 return None 4049 4050 def _parse_string(self) -> t.Optional[exp.Expression]: 4051 if self._match(TokenType.STRING): 4052 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4053 return self._parse_placeholder() 4054 4055 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4056 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4057 4058 def _parse_number(self) -> t.Optional[exp.Expression]: 4059 if self._match(TokenType.NUMBER): 4060 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4061 return self._parse_placeholder() 4062 4063 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4064 if self._match(TokenType.IDENTIFIER): 4065 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4066 return self._parse_placeholder() 4067 4068 def _parse_var( 4069 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4070 ) -> t.Optional[exp.Expression]: 4071 if ( 4072 (any_token and self._advance_any()) 4073 or self._match(TokenType.VAR) 4074 or (self._match_set(tokens) if tokens else False) 4075 ): 4076 return self.expression(exp.Var, this=self._prev.text) 4077 return self._parse_placeholder() 4078 4079 def _advance_any(self) -> t.Optional[Token]: 4080 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4081 self._advance() 4082 return self._prev 4083 return None 4084 4085 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4086 return self._parse_var() or self._parse_string() 4087 4088 def _parse_null(self) -> t.Optional[exp.Expression]: 4089 if self._match(TokenType.NULL): 4090 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4091 return None 4092 4093 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4094 if self._match(TokenType.TRUE): 4095 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4096 if self._match(TokenType.FALSE): 4097 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4098 return None 4099 4100 def _parse_star(self) -> t.Optional[exp.Expression]: 4101 if self._match(TokenType.STAR): 4102 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4103 return None 4104 4105 def _parse_parameter(self) -> exp.Parameter: 4106 wrapped = self._match(TokenType.L_BRACE) 4107 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4108 self._match(TokenType.R_BRACE) 4109 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4110 4111 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4112 if self._match_set(self.PLACEHOLDER_PARSERS): 4113 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4114 if placeholder: 4115 return placeholder 4116 self._advance(-1) 4117 return None 4118 4119 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4120 if not self._match(TokenType.EXCEPT): 4121 return None 4122 if self._match(TokenType.L_PAREN, advance=False): 4123 return self._parse_wrapped_csv(self._parse_column) 4124 return self._parse_csv(self._parse_column) 4125 4126 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4127 if not self._match(TokenType.REPLACE): 4128 return None 4129 if self._match(TokenType.L_PAREN, advance=False): 4130 return self._parse_wrapped_csv(self._parse_expression) 4131 return self._parse_csv(self._parse_expression) 4132 4133 def _parse_csv( 4134 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4135 ) -> t.List[t.Optional[exp.Expression]]: 4136 parse_result = parse_method() 4137 items = [parse_result] if parse_result is not None else [] 4138 4139 while self._match(sep): 4140 self._add_comments(parse_result) 4141 parse_result = parse_method() 4142 if parse_result is not None: 4143 items.append(parse_result) 4144 4145 return items 4146 4147 def _parse_tokens( 4148 self, parse_method: t.Callable, expressions: t.Dict 4149 ) -> t.Optional[exp.Expression]: 4150 this = parse_method() 4151 4152 while self._match_set(expressions): 4153 this = self.expression( 4154 expressions[self._prev.token_type], 4155 this=this, 4156 comments=self._prev_comments, 4157 expression=parse_method(), 4158 ) 4159 4160 return this 4161 4162 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4163 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4164 4165 def _parse_wrapped_csv( 4166 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4167 ) -> t.List[t.Optional[exp.Expression]]: 4168 return self._parse_wrapped( 4169 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4170 ) 4171 4172 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4173 wrapped = self._match(TokenType.L_PAREN) 4174 if not wrapped and not optional: 4175 self.raise_error("Expecting (") 4176 parse_result = parse_method() 4177 if wrapped: 4178 self._match_r_paren() 4179 return parse_result 4180 4181 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4182 return self._parse_select() or self._parse_set_operations( 4183 self._parse_expression() if alias else self._parse_conjunction() 4184 ) 4185 4186 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4187 return self._parse_query_modifiers( 4188 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4189 ) 4190 4191 def _parse_transaction(self) -> exp.Transaction: 4192 this = None 4193 if self._match_texts(self.TRANSACTION_KIND): 4194 this = self._prev.text 4195 4196 self._match_texts({"TRANSACTION", "WORK"}) 4197 4198 modes = [] 4199 while True: 4200 mode = [] 4201 while self._match(TokenType.VAR): 4202 mode.append(self._prev.text) 4203 4204 if mode: 4205 modes.append(" ".join(mode)) 4206 if not self._match(TokenType.COMMA): 4207 break 4208 4209 return self.expression(exp.Transaction, this=this, modes=modes) 4210 4211 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4212 chain = None 4213 savepoint = None 4214 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4215 4216 self._match_texts({"TRANSACTION", "WORK"}) 4217 4218 if self._match_text_seq("TO"): 4219 self._match_text_seq("SAVEPOINT") 4220 savepoint = self._parse_id_var() 4221 4222 if self._match(TokenType.AND): 4223 chain = not self._match_text_seq("NO") 4224 self._match_text_seq("CHAIN") 4225 4226 if is_rollback: 4227 return self.expression(exp.Rollback, savepoint=savepoint) 4228 4229 return self.expression(exp.Commit, chain=chain) 4230 4231 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4232 if not self._match_text_seq("ADD"): 4233 return None 4234 4235 self._match(TokenType.COLUMN) 4236 exists_column = self._parse_exists(not_=True) 4237 expression = self._parse_column_def(self._parse_field(any_token=True)) 4238 4239 if expression: 4240 expression.set("exists", exists_column) 4241 4242 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4243 if self._match_texts(("FIRST", "AFTER")): 4244 position = self._prev.text 4245 column_position = self.expression( 4246 exp.ColumnPosition, this=self._parse_column(), position=position 4247 ) 4248 expression.set("position", column_position) 4249 4250 return expression 4251 4252 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4253 drop = self._match(TokenType.DROP) and self._parse_drop() 4254 if drop and not isinstance(drop, exp.Command): 4255 drop.set("kind", drop.args.get("kind", "COLUMN")) 4256 return drop 4257 4258 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4259 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4260 return self.expression( 4261 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4262 ) 4263 4264 def _parse_add_constraint(self) -> exp.AddConstraint: 4265 this = None 4266 kind = self._prev.token_type 4267 4268 if kind == TokenType.CONSTRAINT: 4269 this = self._parse_id_var() 4270 4271 if self._match_text_seq("CHECK"): 4272 expression = self._parse_wrapped(self._parse_conjunction) 4273 enforced = self._match_text_seq("ENFORCED") 4274 4275 return self.expression( 4276 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4277 ) 4278 4279 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4280 expression = self._parse_foreign_key() 4281 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4282 expression = self._parse_primary_key() 4283 else: 4284 expression = None 4285 4286 return self.expression(exp.AddConstraint, this=this, expression=expression) 4287 4288 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4289 index = self._index - 1 4290 4291 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4292 return self._parse_csv(self._parse_add_constraint) 4293 4294 self._retreat(index) 4295 return self._parse_csv(self._parse_add_column) 4296 4297 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4298 self._match(TokenType.COLUMN) 4299 column = self._parse_field(any_token=True) 4300 4301 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4302 return self.expression(exp.AlterColumn, this=column, drop=True) 4303 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4304 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4305 4306 self._match_text_seq("SET", "DATA") 4307 return self.expression( 4308 exp.AlterColumn, 4309 this=column, 4310 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4311 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4312 using=self._match(TokenType.USING) and self._parse_conjunction(), 4313 ) 4314 4315 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4316 index = self._index - 1 4317 4318 partition_exists = self._parse_exists() 4319 if self._match(TokenType.PARTITION, advance=False): 4320 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4321 4322 self._retreat(index) 4323 return self._parse_csv(self._parse_drop_column) 4324 4325 def _parse_alter_table_rename(self) -> exp.RenameTable: 4326 self._match_text_seq("TO") 4327 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4328 4329 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4330 start = self._prev 4331 4332 if not self._match(TokenType.TABLE): 4333 return self._parse_as_command(start) 4334 4335 exists = self._parse_exists() 4336 this = self._parse_table(schema=True) 4337 4338 if self._next: 4339 self._advance() 4340 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4341 4342 if parser: 4343 actions = ensure_list(parser(self)) 4344 4345 if not self._curr: 4346 return self.expression( 4347 exp.AlterTable, 4348 this=this, 4349 exists=exists, 4350 actions=actions, 4351 ) 4352 return self._parse_as_command(start) 4353 4354 def _parse_merge(self) -> exp.Merge: 4355 self._match(TokenType.INTO) 4356 target = self._parse_table() 4357 4358 self._match(TokenType.USING) 4359 using = self._parse_table() 4360 4361 self._match(TokenType.ON) 4362 on = self._parse_conjunction() 4363 4364 whens = [] 4365 while self._match(TokenType.WHEN): 4366 matched = not self._match(TokenType.NOT) 4367 self._match_text_seq("MATCHED") 4368 source = ( 4369 False 4370 if self._match_text_seq("BY", "TARGET") 4371 else self._match_text_seq("BY", "SOURCE") 4372 ) 4373 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4374 4375 self._match(TokenType.THEN) 4376 4377 if self._match(TokenType.INSERT): 4378 _this = self._parse_star() 4379 if _this: 4380 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4381 else: 4382 then = self.expression( 4383 exp.Insert, 4384 this=self._parse_value(), 4385 expression=self._match(TokenType.VALUES) and self._parse_value(), 4386 ) 4387 elif self._match(TokenType.UPDATE): 4388 expressions = self._parse_star() 4389 if expressions: 4390 then = self.expression(exp.Update, expressions=expressions) 4391 else: 4392 then = self.expression( 4393 exp.Update, 4394 expressions=self._match(TokenType.SET) 4395 and self._parse_csv(self._parse_equality), 4396 ) 4397 elif self._match(TokenType.DELETE): 4398 then = self.expression(exp.Var, this=self._prev.text) 4399 else: 4400 then = None 4401 4402 whens.append( 4403 self.expression( 4404 exp.When, 4405 matched=matched, 4406 source=source, 4407 condition=condition, 4408 then=then, 4409 ) 4410 ) 4411 4412 return self.expression( 4413 exp.Merge, 4414 this=target, 4415 using=using, 4416 on=on, 4417 expressions=whens, 4418 ) 4419 4420 def _parse_show(self) -> t.Optional[exp.Expression]: 4421 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4422 if parser: 4423 return parser(self) 4424 self._advance() 4425 return self.expression(exp.Show, this=self._prev.text.upper()) 4426 4427 def _parse_set_item_assignment( 4428 self, kind: t.Optional[str] = None 4429 ) -> t.Optional[exp.Expression]: 4430 index = self._index 4431 4432 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4433 return self._parse_set_transaction(global_=kind == "GLOBAL") 4434 4435 left = self._parse_primary() or self._parse_id_var() 4436 4437 if not self._match_texts(("=", "TO")): 4438 self._retreat(index) 4439 return None 4440 4441 right = self._parse_statement() or self._parse_id_var() 4442 this = self.expression(exp.EQ, this=left, expression=right) 4443 4444 return self.expression(exp.SetItem, this=this, kind=kind) 4445 4446 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4447 self._match_text_seq("TRANSACTION") 4448 characteristics = self._parse_csv( 4449 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4450 ) 4451 return self.expression( 4452 exp.SetItem, 4453 expressions=characteristics, 4454 kind="TRANSACTION", 4455 **{"global": global_}, # type: ignore 4456 ) 4457 4458 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4459 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4460 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4461 4462 def _parse_set(self) -> exp.Set | exp.Command: 4463 index = self._index 4464 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4465 4466 if self._curr: 4467 self._retreat(index) 4468 return self._parse_as_command(self._prev) 4469 4470 return set_ 4471 4472 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4473 for option in options: 4474 if self._match_text_seq(*option.split(" ")): 4475 return exp.var(option) 4476 return None 4477 4478 def _parse_as_command(self, start: Token) -> exp.Command: 4479 while self._curr: 4480 self._advance() 4481 text = self._find_sql(start, self._prev) 4482 size = len(start.text) 4483 return exp.Command(this=text[:size], expression=text[size:]) 4484 4485 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4486 settings = [] 4487 4488 self._match_l_paren() 4489 kind = self._parse_id_var() 4490 4491 if self._match(TokenType.L_PAREN): 4492 while True: 4493 key = self._parse_id_var() 4494 value = self._parse_primary() 4495 4496 if not key and value is None: 4497 break 4498 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4499 self._match(TokenType.R_PAREN) 4500 4501 self._match_r_paren() 4502 4503 return self.expression( 4504 exp.DictProperty, 4505 this=this, 4506 kind=kind.this if kind else None, 4507 settings=settings, 4508 ) 4509 4510 def _parse_dict_range(self, this: str) -> exp.DictRange: 4511 self._match_l_paren() 4512 has_min = self._match_text_seq("MIN") 4513 if has_min: 4514 min = self._parse_var() or self._parse_primary() 4515 self._match_text_seq("MAX") 4516 max = self._parse_var() or self._parse_primary() 4517 else: 4518 max = self._parse_var() or self._parse_primary() 4519 min = exp.Literal.number(0) 4520 self._match_r_paren() 4521 return self.expression(exp.DictRange, this=this, min=min, max=max) 4522 4523 def _find_parser( 4524 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4525 ) -> t.Optional[t.Callable]: 4526 if not self._curr: 4527 return None 4528 4529 index = self._index 4530 this = [] 4531 while True: 4532 # The current token might be multiple words 4533 curr = self._curr.text.upper() 4534 key = curr.split(" ") 4535 this.append(curr) 4536 self._advance() 4537 result, trie = in_trie(trie, key) 4538 if result == 0: 4539 break 4540 if result == 2: 4541 subparser = parsers[" ".join(this)] 4542 return subparser 4543 self._retreat(index) 4544 return None 4545 4546 def _match(self, token_type, advance=True, expression=None): 4547 if not self._curr: 4548 return None 4549 4550 if self._curr.token_type == token_type: 4551 if advance: 4552 self._advance() 4553 self._add_comments(expression) 4554 return True 4555 4556 return None 4557 4558 def _match_set(self, types, advance=True): 4559 if not self._curr: 4560 return None 4561 4562 if self._curr.token_type in types: 4563 if advance: 4564 self._advance() 4565 return True 4566 4567 return None 4568 4569 def _match_pair(self, token_type_a, token_type_b, advance=True): 4570 if not self._curr or not self._next: 4571 return None 4572 4573 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4574 if advance: 4575 self._advance(2) 4576 return True 4577 4578 return None 4579 4580 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4581 if not self._match(TokenType.L_PAREN, expression=expression): 4582 self.raise_error("Expecting (") 4583 4584 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4585 if not self._match(TokenType.R_PAREN, expression=expression): 4586 self.raise_error("Expecting )") 4587 4588 def _match_texts(self, texts, advance=True): 4589 if self._curr and self._curr.text.upper() in texts: 4590 if advance: 4591 self._advance() 4592 return True 4593 return False 4594 4595 def _match_text_seq(self, *texts, advance=True): 4596 index = self._index 4597 for text in texts: 4598 if self._curr and self._curr.text.upper() == text: 4599 self._advance() 4600 else: 4601 self._retreat(index) 4602 return False 4603 4604 if not advance: 4605 self._retreat(index) 4606 4607 return True 4608 4609 @t.overload 4610 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4611 ... 4612 4613 @t.overload 4614 def _replace_columns_with_dots( 4615 self, this: t.Optional[exp.Expression] 4616 ) -> t.Optional[exp.Expression]: 4617 ... 4618 4619 def _replace_columns_with_dots(self, this): 4620 if isinstance(this, exp.Dot): 4621 exp.replace_children(this, self._replace_columns_with_dots) 4622 elif isinstance(this, exp.Column): 4623 exp.replace_children(this, self._replace_columns_with_dots) 4624 table = this.args.get("table") 4625 this = ( 4626 self.expression(exp.Dot, this=table, expression=this.this) 4627 if table 4628 else self.expression(exp.Var, this=this.name) 4629 ) 4630 elif isinstance(this, exp.Identifier): 4631 this = self.expression(exp.Var, this=this.name) 4632 4633 return this 4634 4635 def _replace_lambda( 4636 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4637 ) -> t.Optional[exp.Expression]: 4638 if not node: 4639 return node 4640 4641 for column in node.find_all(exp.Column): 4642 if column.parts[0].name in lambda_variables: 4643 dot_or_id = column.to_dot() if column.table else column.this 4644 parent = column.parent 4645 4646 while isinstance(parent, exp.Dot): 4647 if not isinstance(parent.parent, exp.Dot): 4648 parent.replace(dot_or_id) 4649 break 4650 parent = parent.parent 4651 else: 4652 if column is node: 4653 node = dot_or_id 4654 else: 4655 column.replace(dot_or_id) 4656 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
822 def __init__( 823 self, 824 error_level: t.Optional[ErrorLevel] = None, 825 error_message_context: int = 100, 826 max_errors: int = 3, 827 ): 828 self.error_level = error_level or ErrorLevel.IMMEDIATE 829 self.error_message_context = error_message_context 830 self.max_errors = max_errors 831 self.reset()
843 def parse( 844 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 845 ) -> t.List[t.Optional[exp.Expression]]: 846 """ 847 Parses a list of tokens and returns a list of syntax trees, one tree 848 per parsed SQL statement. 849 850 Args: 851 raw_tokens: The list of tokens. 852 sql: The original SQL string, used to produce helpful debug messages. 853 854 Returns: 855 The list of the produced syntax trees. 856 """ 857 return self._parse( 858 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 859 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
861 def parse_into( 862 self, 863 expression_types: exp.IntoType, 864 raw_tokens: t.List[Token], 865 sql: t.Optional[str] = None, 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens into a given Expression type. If a collection of Expression 869 types is given instead, this method will try to parse the token list into each one 870 of them, stopping at the first for which the parsing succeeds. 871 872 Args: 873 expression_types: The expression type(s) to try and parse the token list into. 874 raw_tokens: The list of tokens. 875 sql: The original SQL string, used to produce helpful debug messages. 876 877 Returns: 878 The target Expression. 879 """ 880 errors = [] 881 for expression_type in ensure_list(expression_types): 882 parser = self.EXPRESSION_PARSERS.get(expression_type) 883 if not parser: 884 raise TypeError(f"No parser registered for {expression_type}") 885 886 try: 887 return self._parse(parser, raw_tokens, sql) 888 except ParseError as e: 889 e.errors[0]["into_expression"] = expression_type 890 errors.append(e) 891 892 raise ParseError( 893 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 894 errors=merge_errors(errors), 895 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
932 def check_errors(self) -> None: 933 """Logs or raises any found errors, depending on the chosen error level setting.""" 934 if self.error_level == ErrorLevel.WARN: 935 for error in self.errors: 936 logger.error(str(error)) 937 elif self.error_level == ErrorLevel.RAISE and self.errors: 938 raise ParseError( 939 concat_messages(self.errors, self.max_errors), 940 errors=merge_errors(self.errors), 941 )
Logs or raises any found errors, depending on the chosen error level setting.
943 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 944 """ 945 Appends an error in the list of recorded errors or raises it, depending on the chosen 946 error level setting. 947 """ 948 token = token or self._curr or self._prev or Token.string("") 949 start = token.start 950 end = token.end + 1 951 start_context = self.sql[max(start - self.error_message_context, 0) : start] 952 highlight = self.sql[start:end] 953 end_context = self.sql[end : end + self.error_message_context] 954 955 error = ParseError.new( 956 f"{message}. Line {token.line}, Col: {token.col}.\n" 957 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 958 description=message, 959 line=token.line, 960 col=token.col, 961 start_context=start_context, 962 highlight=highlight, 963 end_context=end_context, 964 ) 965 966 if self.error_level == ErrorLevel.IMMEDIATE: 967 raise error 968 969 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
971 def expression( 972 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 973 ) -> E: 974 """ 975 Creates a new, validated Expression. 976 977 Args: 978 exp_class: The expression class to instantiate. 979 comments: An optional list of comments to attach to the expression. 980 kwargs: The arguments to set for the expression along with their respective values. 981 982 Returns: 983 The target expression. 984 """ 985 instance = exp_class(**kwargs) 986 instance.add_comments(comments) if comments else self._add_comments(instance) 987 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
994 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 995 """ 996 Validates an Expression, making sure that all its mandatory arguments are set. 997 998 Args: 999 expression: The expression to validate. 1000 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1001 1002 Returns: 1003 The validated expression. 1004 """ 1005 if self.error_level != ErrorLevel.IGNORE: 1006 for error_message in expression.error_messages(args): 1007 self.raise_error(error_message) 1008 1009 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.