sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 721 "CONCAT": lambda self: self._parse_concat(), 722 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 723 "DECODE": lambda self: self._parse_decode(), 724 "EXTRACT": lambda self: self._parse_extract(), 725 "JSON_OBJECT": lambda self: self._parse_json_object(), 726 "LOG": lambda self: self._parse_logarithm(), 727 "MATCH": lambda self: self._parse_match_against(), 728 "OPENJSON": lambda self: self._parse_open_json(), 729 "POSITION": lambda self: self._parse_position(), 730 "SAFE_CAST": lambda self: self._parse_cast(False), 731 "STRING_AGG": lambda self: self._parse_string_agg(), 732 "SUBSTRING": lambda self: self._parse_substring(), 733 "TRIM": lambda self: self._parse_trim(), 734 "TRY_CAST": lambda self: self._parse_cast(False), 735 "TRY_CONVERT": lambda self: self._parse_convert(False), 736 } 737 738 QUERY_MODIFIER_PARSERS = { 739 "joins": lambda self: list(iter(self._parse_join, None)), 740 "laterals": lambda self: list(iter(self._parse_lateral, None)), 741 "match": lambda self: self._parse_match_recognize(), 742 "where": lambda self: self._parse_where(), 743 "group": lambda self: self._parse_group(), 744 "having": lambda self: self._parse_having(), 745 "qualify": lambda self: self._parse_qualify(), 746 "windows": lambda self: self._parse_window_clause(), 747 "order": lambda self: self._parse_order(), 748 "limit": lambda self: self._parse_limit(), 749 "offset": lambda self: self._parse_offset(), 750 "locks": lambda self: self._parse_locks(), 751 "sample": lambda self: self._parse_table_sample(as_modifier=True), 752 } 753 754 SET_PARSERS = { 755 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 756 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 757 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 758 "TRANSACTION": lambda self: self._parse_set_transaction(), 759 } 760 761 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 762 763 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 764 765 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 766 767 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 768 769 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 770 771 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 772 TRANSACTION_CHARACTERISTICS = { 773 "ISOLATION LEVEL REPEATABLE READ", 774 "ISOLATION LEVEL READ COMMITTED", 775 "ISOLATION LEVEL READ UNCOMMITTED", 776 "ISOLATION LEVEL SERIALIZABLE", 777 "READ WRITE", 778 "READ ONLY", 779 } 780 781 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 782 783 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 784 785 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 786 787 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 788 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 789 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 790 791 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 792 793 STRICT_CAST = True 794 795 # A NULL arg in CONCAT yields NULL by default 796 CONCAT_NULL_OUTPUTS_STRING = False 797 798 PREFIXED_PIVOT_COLUMNS = False 799 IDENTIFY_PIVOT_STRINGS = False 800 801 LOG_BASE_FIRST = True 802 LOG_DEFAULTS_TO_LN = False 803 804 __slots__ = ( 805 "error_level", 806 "error_message_context", 807 "max_errors", 808 "sql", 809 "errors", 810 "_tokens", 811 "_index", 812 "_curr", 813 "_next", 814 "_prev", 815 "_prev_comments", 816 ) 817 818 # Autofilled 819 INDEX_OFFSET: int = 0 820 UNNEST_COLUMN_ONLY: bool = False 821 ALIAS_POST_TABLESAMPLE: bool = False 822 STRICT_STRING_CONCAT = False 823 NULL_ORDERING: str = "nulls_are_small" 824 SHOW_TRIE: t.Dict = {} 825 SET_TRIE: t.Dict = {} 826 FORMAT_MAPPING: t.Dict[str, str] = {} 827 FORMAT_TRIE: t.Dict = {} 828 TIME_MAPPING: t.Dict[str, str] = {} 829 TIME_TRIE: t.Dict = {} 830 831 def __init__( 832 self, 833 error_level: t.Optional[ErrorLevel] = None, 834 error_message_context: int = 100, 835 max_errors: int = 3, 836 ): 837 self.error_level = error_level or ErrorLevel.IMMEDIATE 838 self.error_message_context = error_message_context 839 self.max_errors = max_errors 840 self.reset() 841 842 def reset(self): 843 self.sql = "" 844 self.errors = [] 845 self._tokens = [] 846 self._index = 0 847 self._curr = None 848 self._next = None 849 self._prev = None 850 self._prev_comments = None 851 852 def parse( 853 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 854 ) -> t.List[t.Optional[exp.Expression]]: 855 """ 856 Parses a list of tokens and returns a list of syntax trees, one tree 857 per parsed SQL statement. 858 859 Args: 860 raw_tokens: The list of tokens. 861 sql: The original SQL string, used to produce helpful debug messages. 862 863 Returns: 864 The list of the produced syntax trees. 865 """ 866 return self._parse( 867 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 868 ) 869 870 def parse_into( 871 self, 872 expression_types: exp.IntoType, 873 raw_tokens: t.List[Token], 874 sql: t.Optional[str] = None, 875 ) -> t.List[t.Optional[exp.Expression]]: 876 """ 877 Parses a list of tokens into a given Expression type. If a collection of Expression 878 types is given instead, this method will try to parse the token list into each one 879 of them, stopping at the first for which the parsing succeeds. 880 881 Args: 882 expression_types: The expression type(s) to try and parse the token list into. 883 raw_tokens: The list of tokens. 884 sql: The original SQL string, used to produce helpful debug messages. 885 886 Returns: 887 The target Expression. 888 """ 889 errors = [] 890 for expression_type in ensure_list(expression_types): 891 parser = self.EXPRESSION_PARSERS.get(expression_type) 892 if not parser: 893 raise TypeError(f"No parser registered for {expression_type}") 894 895 try: 896 return self._parse(parser, raw_tokens, sql) 897 except ParseError as e: 898 e.errors[0]["into_expression"] = expression_type 899 errors.append(e) 900 901 raise ParseError( 902 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 903 errors=merge_errors(errors), 904 ) from errors[-1] 905 906 def _parse( 907 self, 908 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 909 raw_tokens: t.List[Token], 910 sql: t.Optional[str] = None, 911 ) -> t.List[t.Optional[exp.Expression]]: 912 self.reset() 913 self.sql = sql or "" 914 915 total = len(raw_tokens) 916 chunks: t.List[t.List[Token]] = [[]] 917 918 for i, token in enumerate(raw_tokens): 919 if token.token_type == TokenType.SEMICOLON: 920 if i < total - 1: 921 chunks.append([]) 922 else: 923 chunks[-1].append(token) 924 925 expressions = [] 926 927 for tokens in chunks: 928 self._index = -1 929 self._tokens = tokens 930 self._advance() 931 932 expressions.append(parse_method(self)) 933 934 if self._index < len(self._tokens): 935 self.raise_error("Invalid expression / Unexpected token") 936 937 self.check_errors() 938 939 return expressions 940 941 def check_errors(self) -> None: 942 """Logs or raises any found errors, depending on the chosen error level setting.""" 943 if self.error_level == ErrorLevel.WARN: 944 for error in self.errors: 945 logger.error(str(error)) 946 elif self.error_level == ErrorLevel.RAISE and self.errors: 947 raise ParseError( 948 concat_messages(self.errors, self.max_errors), 949 errors=merge_errors(self.errors), 950 ) 951 952 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 953 """ 954 Appends an error in the list of recorded errors or raises it, depending on the chosen 955 error level setting. 956 """ 957 token = token or self._curr or self._prev or Token.string("") 958 start = token.start 959 end = token.end + 1 960 start_context = self.sql[max(start - self.error_message_context, 0) : start] 961 highlight = self.sql[start:end] 962 end_context = self.sql[end : end + self.error_message_context] 963 964 error = ParseError.new( 965 f"{message}. Line {token.line}, Col: {token.col}.\n" 966 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 967 description=message, 968 line=token.line, 969 col=token.col, 970 start_context=start_context, 971 highlight=highlight, 972 end_context=end_context, 973 ) 974 975 if self.error_level == ErrorLevel.IMMEDIATE: 976 raise error 977 978 self.errors.append(error) 979 980 def expression( 981 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 982 ) -> E: 983 """ 984 Creates a new, validated Expression. 985 986 Args: 987 exp_class: The expression class to instantiate. 988 comments: An optional list of comments to attach to the expression. 989 kwargs: The arguments to set for the expression along with their respective values. 990 991 Returns: 992 The target expression. 993 """ 994 instance = exp_class(**kwargs) 995 instance.add_comments(comments) if comments else self._add_comments(instance) 996 return self.validate_expression(instance) 997 998 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 999 if expression and self._prev_comments: 1000 expression.add_comments(self._prev_comments) 1001 self._prev_comments = None 1002 1003 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1004 """ 1005 Validates an Expression, making sure that all its mandatory arguments are set. 1006 1007 Args: 1008 expression: The expression to validate. 1009 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1010 1011 Returns: 1012 The validated expression. 1013 """ 1014 if self.error_level != ErrorLevel.IGNORE: 1015 for error_message in expression.error_messages(args): 1016 self.raise_error(error_message) 1017 1018 return expression 1019 1020 def _find_sql(self, start: Token, end: Token) -> str: 1021 return self.sql[start.start : end.end + 1] 1022 1023 def _advance(self, times: int = 1) -> None: 1024 self._index += times 1025 self._curr = seq_get(self._tokens, self._index) 1026 self._next = seq_get(self._tokens, self._index + 1) 1027 1028 if self._index > 0: 1029 self._prev = self._tokens[self._index - 1] 1030 self._prev_comments = self._prev.comments 1031 else: 1032 self._prev = None 1033 self._prev_comments = None 1034 1035 def _retreat(self, index: int) -> None: 1036 if index != self._index: 1037 self._advance(index - self._index) 1038 1039 def _parse_command(self) -> exp.Command: 1040 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1041 1042 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1043 start = self._prev 1044 exists = self._parse_exists() if allow_exists else None 1045 1046 self._match(TokenType.ON) 1047 1048 kind = self._match_set(self.CREATABLES) and self._prev 1049 if not kind: 1050 return self._parse_as_command(start) 1051 1052 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1053 this = self._parse_user_defined_function(kind=kind.token_type) 1054 elif kind.token_type == TokenType.TABLE: 1055 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1056 elif kind.token_type == TokenType.COLUMN: 1057 this = self._parse_column() 1058 else: 1059 this = self._parse_id_var() 1060 1061 self._match(TokenType.IS) 1062 1063 return self.expression( 1064 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1065 ) 1066 1067 def _parse_to_table( 1068 self, 1069 ) -> exp.ToTableProperty: 1070 table = self._parse_table_parts(schema=True) 1071 return self.expression(exp.ToTableProperty, this=table) 1072 1073 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1074 def _parse_ttl(self) -> exp.Expression: 1075 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1076 this = self._parse_bitwise() 1077 1078 if self._match_text_seq("DELETE"): 1079 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1080 if self._match_text_seq("RECOMPRESS"): 1081 return self.expression( 1082 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1083 ) 1084 if self._match_text_seq("TO", "DISK"): 1085 return self.expression( 1086 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1087 ) 1088 if self._match_text_seq("TO", "VOLUME"): 1089 return self.expression( 1090 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1091 ) 1092 1093 return this 1094 1095 expressions = self._parse_csv(_parse_ttl_action) 1096 where = self._parse_where() 1097 group = self._parse_group() 1098 1099 aggregates = None 1100 if group and self._match(TokenType.SET): 1101 aggregates = self._parse_csv(self._parse_set_item) 1102 1103 return self.expression( 1104 exp.MergeTreeTTL, 1105 expressions=expressions, 1106 where=where, 1107 group=group, 1108 aggregates=aggregates, 1109 ) 1110 1111 def _parse_statement(self) -> t.Optional[exp.Expression]: 1112 if self._curr is None: 1113 return None 1114 1115 if self._match_set(self.STATEMENT_PARSERS): 1116 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1117 1118 if self._match_set(Tokenizer.COMMANDS): 1119 return self._parse_command() 1120 1121 expression = self._parse_expression() 1122 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1123 return self._parse_query_modifiers(expression) 1124 1125 def _parse_drop(self) -> exp.Drop | exp.Command: 1126 start = self._prev 1127 temporary = self._match(TokenType.TEMPORARY) 1128 materialized = self._match_text_seq("MATERIALIZED") 1129 1130 kind = self._match_set(self.CREATABLES) and self._prev.text 1131 if not kind: 1132 return self._parse_as_command(start) 1133 1134 return self.expression( 1135 exp.Drop, 1136 exists=self._parse_exists(), 1137 this=self._parse_table(schema=True), 1138 kind=kind, 1139 temporary=temporary, 1140 materialized=materialized, 1141 cascade=self._match_text_seq("CASCADE"), 1142 constraints=self._match_text_seq("CONSTRAINTS"), 1143 purge=self._match_text_seq("PURGE"), 1144 ) 1145 1146 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1147 return ( 1148 self._match(TokenType.IF) 1149 and (not not_ or self._match(TokenType.NOT)) 1150 and self._match(TokenType.EXISTS) 1151 ) 1152 1153 def _parse_create(self) -> exp.Create | exp.Command: 1154 # Note: this can't be None because we've matched a statement parser 1155 start = self._prev 1156 replace = start.text.upper() == "REPLACE" or self._match_pair( 1157 TokenType.OR, TokenType.REPLACE 1158 ) 1159 unique = self._match(TokenType.UNIQUE) 1160 1161 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1162 self._advance() 1163 1164 properties = None 1165 create_token = self._match_set(self.CREATABLES) and self._prev 1166 1167 if not create_token: 1168 # exp.Properties.Location.POST_CREATE 1169 properties = self._parse_properties() 1170 create_token = self._match_set(self.CREATABLES) and self._prev 1171 1172 if not properties or not create_token: 1173 return self._parse_as_command(start) 1174 1175 exists = self._parse_exists(not_=True) 1176 this = None 1177 expression = None 1178 indexes = None 1179 no_schema_binding = None 1180 begin = None 1181 clone = None 1182 1183 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1184 nonlocal properties 1185 if properties and temp_props: 1186 properties.expressions.extend(temp_props.expressions) 1187 elif temp_props: 1188 properties = temp_props 1189 1190 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1191 this = self._parse_user_defined_function(kind=create_token.token_type) 1192 1193 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1194 extend_props(self._parse_properties()) 1195 1196 self._match(TokenType.ALIAS) 1197 begin = self._match(TokenType.BEGIN) 1198 return_ = self._match_text_seq("RETURN") 1199 expression = self._parse_statement() 1200 1201 if return_: 1202 expression = self.expression(exp.Return, this=expression) 1203 elif create_token.token_type == TokenType.INDEX: 1204 this = self._parse_index(index=self._parse_id_var()) 1205 elif create_token.token_type in self.DB_CREATABLES: 1206 table_parts = self._parse_table_parts(schema=True) 1207 1208 # exp.Properties.Location.POST_NAME 1209 self._match(TokenType.COMMA) 1210 extend_props(self._parse_properties(before=True)) 1211 1212 this = self._parse_schema(this=table_parts) 1213 1214 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1215 extend_props(self._parse_properties()) 1216 1217 self._match(TokenType.ALIAS) 1218 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1219 # exp.Properties.Location.POST_ALIAS 1220 extend_props(self._parse_properties()) 1221 1222 expression = self._parse_ddl_select() 1223 1224 if create_token.token_type == TokenType.TABLE: 1225 indexes = [] 1226 while True: 1227 index = self._parse_index() 1228 1229 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1230 extend_props(self._parse_properties()) 1231 1232 if not index: 1233 break 1234 else: 1235 self._match(TokenType.COMMA) 1236 indexes.append(index) 1237 elif create_token.token_type == TokenType.VIEW: 1238 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1239 no_schema_binding = True 1240 1241 if self._match_text_seq("CLONE"): 1242 clone = self._parse_table(schema=True) 1243 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1244 clone_kind = ( 1245 self._match(TokenType.L_PAREN) 1246 and self._match_texts(self.CLONE_KINDS) 1247 and self._prev.text.upper() 1248 ) 1249 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1250 self._match(TokenType.R_PAREN) 1251 clone = self.expression( 1252 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1253 ) 1254 1255 return self.expression( 1256 exp.Create, 1257 this=this, 1258 kind=create_token.text, 1259 replace=replace, 1260 unique=unique, 1261 expression=expression, 1262 exists=exists, 1263 properties=properties, 1264 indexes=indexes, 1265 no_schema_binding=no_schema_binding, 1266 begin=begin, 1267 clone=clone, 1268 ) 1269 1270 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1271 # only used for teradata currently 1272 self._match(TokenType.COMMA) 1273 1274 kwargs = { 1275 "no": self._match_text_seq("NO"), 1276 "dual": self._match_text_seq("DUAL"), 1277 "before": self._match_text_seq("BEFORE"), 1278 "default": self._match_text_seq("DEFAULT"), 1279 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1280 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1281 "after": self._match_text_seq("AFTER"), 1282 "minimum": self._match_texts(("MIN", "MINIMUM")), 1283 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1284 } 1285 1286 if self._match_texts(self.PROPERTY_PARSERS): 1287 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1288 try: 1289 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1290 except TypeError: 1291 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1292 1293 return None 1294 1295 def _parse_property(self) -> t.Optional[exp.Expression]: 1296 if self._match_texts(self.PROPERTY_PARSERS): 1297 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1298 1299 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1300 return self._parse_character_set(default=True) 1301 1302 if self._match_text_seq("COMPOUND", "SORTKEY"): 1303 return self._parse_sortkey(compound=True) 1304 1305 if self._match_text_seq("SQL", "SECURITY"): 1306 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1307 1308 assignment = self._match_pair( 1309 TokenType.VAR, TokenType.EQ, advance=False 1310 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1311 1312 if assignment: 1313 key = self._parse_var_or_string() 1314 self._match(TokenType.EQ) 1315 return self.expression(exp.Property, this=key, value=self._parse_column()) 1316 1317 return None 1318 1319 def _parse_stored(self) -> exp.FileFormatProperty: 1320 self._match(TokenType.ALIAS) 1321 1322 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1323 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1324 1325 return self.expression( 1326 exp.FileFormatProperty, 1327 this=self.expression( 1328 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1329 ) 1330 if input_format or output_format 1331 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1332 ) 1333 1334 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1335 self._match(TokenType.EQ) 1336 self._match(TokenType.ALIAS) 1337 return self.expression(exp_class, this=self._parse_field()) 1338 1339 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1340 properties = [] 1341 while True: 1342 if before: 1343 prop = self._parse_property_before() 1344 else: 1345 prop = self._parse_property() 1346 1347 if not prop: 1348 break 1349 for p in ensure_list(prop): 1350 properties.append(p) 1351 1352 if properties: 1353 return self.expression(exp.Properties, expressions=properties) 1354 1355 return None 1356 1357 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1358 return self.expression( 1359 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1360 ) 1361 1362 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1363 if self._index >= 2: 1364 pre_volatile_token = self._tokens[self._index - 2] 1365 else: 1366 pre_volatile_token = None 1367 1368 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1369 return exp.VolatileProperty() 1370 1371 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1372 1373 def _parse_with_property( 1374 self, 1375 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1376 self._match(TokenType.WITH) 1377 if self._match(TokenType.L_PAREN, advance=False): 1378 return self._parse_wrapped_csv(self._parse_property) 1379 1380 if self._match_text_seq("JOURNAL"): 1381 return self._parse_withjournaltable() 1382 1383 if self._match_text_seq("DATA"): 1384 return self._parse_withdata(no=False) 1385 elif self._match_text_seq("NO", "DATA"): 1386 return self._parse_withdata(no=True) 1387 1388 if not self._next: 1389 return None 1390 1391 return self._parse_withisolatedloading() 1392 1393 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1394 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1395 self._match(TokenType.EQ) 1396 1397 user = self._parse_id_var() 1398 self._match(TokenType.PARAMETER) 1399 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1400 1401 if not user or not host: 1402 return None 1403 1404 return exp.DefinerProperty(this=f"{user}@{host}") 1405 1406 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1407 self._match(TokenType.TABLE) 1408 self._match(TokenType.EQ) 1409 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1410 1411 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1412 return self.expression(exp.LogProperty, no=no) 1413 1414 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1415 return self.expression(exp.JournalProperty, **kwargs) 1416 1417 def _parse_checksum(self) -> exp.ChecksumProperty: 1418 self._match(TokenType.EQ) 1419 1420 on = None 1421 if self._match(TokenType.ON): 1422 on = True 1423 elif self._match_text_seq("OFF"): 1424 on = False 1425 1426 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1427 1428 def _parse_cluster(self) -> exp.Cluster: 1429 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1430 1431 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1432 self._match_text_seq("BY") 1433 1434 self._match_l_paren() 1435 expressions = self._parse_csv(self._parse_column) 1436 self._match_r_paren() 1437 1438 if self._match_text_seq("SORTED", "BY"): 1439 self._match_l_paren() 1440 sorted_by = self._parse_csv(self._parse_ordered) 1441 self._match_r_paren() 1442 else: 1443 sorted_by = None 1444 1445 self._match(TokenType.INTO) 1446 buckets = self._parse_number() 1447 self._match_text_seq("BUCKETS") 1448 1449 return self.expression( 1450 exp.ClusteredByProperty, 1451 expressions=expressions, 1452 sorted_by=sorted_by, 1453 buckets=buckets, 1454 ) 1455 1456 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1457 if not self._match_text_seq("GRANTS"): 1458 self._retreat(self._index - 1) 1459 return None 1460 1461 return self.expression(exp.CopyGrantsProperty) 1462 1463 def _parse_freespace(self) -> exp.FreespaceProperty: 1464 self._match(TokenType.EQ) 1465 return self.expression( 1466 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1467 ) 1468 1469 def _parse_mergeblockratio( 1470 self, no: bool = False, default: bool = False 1471 ) -> exp.MergeBlockRatioProperty: 1472 if self._match(TokenType.EQ): 1473 return self.expression( 1474 exp.MergeBlockRatioProperty, 1475 this=self._parse_number(), 1476 percent=self._match(TokenType.PERCENT), 1477 ) 1478 1479 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1480 1481 def _parse_datablocksize( 1482 self, 1483 default: t.Optional[bool] = None, 1484 minimum: t.Optional[bool] = None, 1485 maximum: t.Optional[bool] = None, 1486 ) -> exp.DataBlocksizeProperty: 1487 self._match(TokenType.EQ) 1488 size = self._parse_number() 1489 1490 units = None 1491 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1492 units = self._prev.text 1493 1494 return self.expression( 1495 exp.DataBlocksizeProperty, 1496 size=size, 1497 units=units, 1498 default=default, 1499 minimum=minimum, 1500 maximum=maximum, 1501 ) 1502 1503 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1504 self._match(TokenType.EQ) 1505 always = self._match_text_seq("ALWAYS") 1506 manual = self._match_text_seq("MANUAL") 1507 never = self._match_text_seq("NEVER") 1508 default = self._match_text_seq("DEFAULT") 1509 1510 autotemp = None 1511 if self._match_text_seq("AUTOTEMP"): 1512 autotemp = self._parse_schema() 1513 1514 return self.expression( 1515 exp.BlockCompressionProperty, 1516 always=always, 1517 manual=manual, 1518 never=never, 1519 default=default, 1520 autotemp=autotemp, 1521 ) 1522 1523 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1524 no = self._match_text_seq("NO") 1525 concurrent = self._match_text_seq("CONCURRENT") 1526 self._match_text_seq("ISOLATED", "LOADING") 1527 for_all = self._match_text_seq("FOR", "ALL") 1528 for_insert = self._match_text_seq("FOR", "INSERT") 1529 for_none = self._match_text_seq("FOR", "NONE") 1530 return self.expression( 1531 exp.IsolatedLoadingProperty, 1532 no=no, 1533 concurrent=concurrent, 1534 for_all=for_all, 1535 for_insert=for_insert, 1536 for_none=for_none, 1537 ) 1538 1539 def _parse_locking(self) -> exp.LockingProperty: 1540 if self._match(TokenType.TABLE): 1541 kind = "TABLE" 1542 elif self._match(TokenType.VIEW): 1543 kind = "VIEW" 1544 elif self._match(TokenType.ROW): 1545 kind = "ROW" 1546 elif self._match_text_seq("DATABASE"): 1547 kind = "DATABASE" 1548 else: 1549 kind = None 1550 1551 if kind in ("DATABASE", "TABLE", "VIEW"): 1552 this = self._parse_table_parts() 1553 else: 1554 this = None 1555 1556 if self._match(TokenType.FOR): 1557 for_or_in = "FOR" 1558 elif self._match(TokenType.IN): 1559 for_or_in = "IN" 1560 else: 1561 for_or_in = None 1562 1563 if self._match_text_seq("ACCESS"): 1564 lock_type = "ACCESS" 1565 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1566 lock_type = "EXCLUSIVE" 1567 elif self._match_text_seq("SHARE"): 1568 lock_type = "SHARE" 1569 elif self._match_text_seq("READ"): 1570 lock_type = "READ" 1571 elif self._match_text_seq("WRITE"): 1572 lock_type = "WRITE" 1573 elif self._match_text_seq("CHECKSUM"): 1574 lock_type = "CHECKSUM" 1575 else: 1576 lock_type = None 1577 1578 override = self._match_text_seq("OVERRIDE") 1579 1580 return self.expression( 1581 exp.LockingProperty, 1582 this=this, 1583 kind=kind, 1584 for_or_in=for_or_in, 1585 lock_type=lock_type, 1586 override=override, 1587 ) 1588 1589 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1590 if self._match(TokenType.PARTITION_BY): 1591 return self._parse_csv(self._parse_conjunction) 1592 return [] 1593 1594 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1595 self._match(TokenType.EQ) 1596 return self.expression( 1597 exp.PartitionedByProperty, 1598 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1599 ) 1600 1601 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1602 if self._match_text_seq("AND", "STATISTICS"): 1603 statistics = True 1604 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1605 statistics = False 1606 else: 1607 statistics = None 1608 1609 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1610 1611 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1612 if self._match_text_seq("PRIMARY", "INDEX"): 1613 return exp.NoPrimaryIndexProperty() 1614 return None 1615 1616 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1617 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1618 return exp.OnCommitProperty() 1619 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1620 return exp.OnCommitProperty(delete=True) 1621 return None 1622 1623 def _parse_distkey(self) -> exp.DistKeyProperty: 1624 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1625 1626 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1627 table = self._parse_table(schema=True) 1628 1629 options = [] 1630 while self._match_texts(("INCLUDING", "EXCLUDING")): 1631 this = self._prev.text.upper() 1632 1633 id_var = self._parse_id_var() 1634 if not id_var: 1635 return None 1636 1637 options.append( 1638 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1639 ) 1640 1641 return self.expression(exp.LikeProperty, this=table, expressions=options) 1642 1643 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1644 return self.expression( 1645 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1646 ) 1647 1648 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1649 self._match(TokenType.EQ) 1650 return self.expression( 1651 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1652 ) 1653 1654 def _parse_returns(self) -> exp.ReturnsProperty: 1655 value: t.Optional[exp.Expression] 1656 is_table = self._match(TokenType.TABLE) 1657 1658 if is_table: 1659 if self._match(TokenType.LT): 1660 value = self.expression( 1661 exp.Schema, 1662 this="TABLE", 1663 expressions=self._parse_csv(self._parse_struct_types), 1664 ) 1665 if not self._match(TokenType.GT): 1666 self.raise_error("Expecting >") 1667 else: 1668 value = self._parse_schema(exp.var("TABLE")) 1669 else: 1670 value = self._parse_types() 1671 1672 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1673 1674 def _parse_describe(self) -> exp.Describe: 1675 kind = self._match_set(self.CREATABLES) and self._prev.text 1676 this = self._parse_table() 1677 return self.expression(exp.Describe, this=this, kind=kind) 1678 1679 def _parse_insert(self) -> exp.Insert: 1680 overwrite = self._match(TokenType.OVERWRITE) 1681 local = self._match_text_seq("LOCAL") 1682 alternative = None 1683 1684 if self._match_text_seq("DIRECTORY"): 1685 this: t.Optional[exp.Expression] = self.expression( 1686 exp.Directory, 1687 this=self._parse_var_or_string(), 1688 local=local, 1689 row_format=self._parse_row_format(match_row=True), 1690 ) 1691 else: 1692 if self._match(TokenType.OR): 1693 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1694 1695 self._match(TokenType.INTO) 1696 self._match(TokenType.TABLE) 1697 this = self._parse_table(schema=True) 1698 1699 return self.expression( 1700 exp.Insert, 1701 this=this, 1702 exists=self._parse_exists(), 1703 partition=self._parse_partition(), 1704 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1705 and self._parse_conjunction(), 1706 expression=self._parse_ddl_select(), 1707 conflict=self._parse_on_conflict(), 1708 returning=self._parse_returning(), 1709 overwrite=overwrite, 1710 alternative=alternative, 1711 ) 1712 1713 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1714 conflict = self._match_text_seq("ON", "CONFLICT") 1715 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1716 1717 if not conflict and not duplicate: 1718 return None 1719 1720 nothing = None 1721 expressions = None 1722 key = None 1723 constraint = None 1724 1725 if conflict: 1726 if self._match_text_seq("ON", "CONSTRAINT"): 1727 constraint = self._parse_id_var() 1728 else: 1729 key = self._parse_csv(self._parse_value) 1730 1731 self._match_text_seq("DO") 1732 if self._match_text_seq("NOTHING"): 1733 nothing = True 1734 else: 1735 self._match(TokenType.UPDATE) 1736 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1737 1738 return self.expression( 1739 exp.OnConflict, 1740 duplicate=duplicate, 1741 expressions=expressions, 1742 nothing=nothing, 1743 key=key, 1744 constraint=constraint, 1745 ) 1746 1747 def _parse_returning(self) -> t.Optional[exp.Returning]: 1748 if not self._match(TokenType.RETURNING): 1749 return None 1750 1751 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1752 1753 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1754 if not self._match(TokenType.FORMAT): 1755 return None 1756 return self._parse_row_format() 1757 1758 def _parse_row_format( 1759 self, match_row: bool = False 1760 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1761 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1762 return None 1763 1764 if self._match_text_seq("SERDE"): 1765 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1766 1767 self._match_text_seq("DELIMITED") 1768 1769 kwargs = {} 1770 1771 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1772 kwargs["fields"] = self._parse_string() 1773 if self._match_text_seq("ESCAPED", "BY"): 1774 kwargs["escaped"] = self._parse_string() 1775 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1776 kwargs["collection_items"] = self._parse_string() 1777 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1778 kwargs["map_keys"] = self._parse_string() 1779 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1780 kwargs["lines"] = self._parse_string() 1781 if self._match_text_seq("NULL", "DEFINED", "AS"): 1782 kwargs["null"] = self._parse_string() 1783 1784 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1785 1786 def _parse_load(self) -> exp.LoadData | exp.Command: 1787 if self._match_text_seq("DATA"): 1788 local = self._match_text_seq("LOCAL") 1789 self._match_text_seq("INPATH") 1790 inpath = self._parse_string() 1791 overwrite = self._match(TokenType.OVERWRITE) 1792 self._match_pair(TokenType.INTO, TokenType.TABLE) 1793 1794 return self.expression( 1795 exp.LoadData, 1796 this=self._parse_table(schema=True), 1797 local=local, 1798 overwrite=overwrite, 1799 inpath=inpath, 1800 partition=self._parse_partition(), 1801 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1802 serde=self._match_text_seq("SERDE") and self._parse_string(), 1803 ) 1804 return self._parse_as_command(self._prev) 1805 1806 def _parse_delete(self) -> exp.Delete: 1807 self._match(TokenType.FROM) 1808 1809 return self.expression( 1810 exp.Delete, 1811 this=self._parse_table(), 1812 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1813 where=self._parse_where(), 1814 returning=self._parse_returning(), 1815 limit=self._parse_limit(), 1816 ) 1817 1818 def _parse_update(self) -> exp.Update: 1819 return self.expression( 1820 exp.Update, 1821 **{ # type: ignore 1822 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1823 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1824 "from": self._parse_from(modifiers=True), 1825 "where": self._parse_where(), 1826 "returning": self._parse_returning(), 1827 "limit": self._parse_limit(), 1828 }, 1829 ) 1830 1831 def _parse_uncache(self) -> exp.Uncache: 1832 if not self._match(TokenType.TABLE): 1833 self.raise_error("Expecting TABLE after UNCACHE") 1834 1835 return self.expression( 1836 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1837 ) 1838 1839 def _parse_cache(self) -> exp.Cache: 1840 lazy = self._match_text_seq("LAZY") 1841 self._match(TokenType.TABLE) 1842 table = self._parse_table(schema=True) 1843 1844 options = [] 1845 if self._match_text_seq("OPTIONS"): 1846 self._match_l_paren() 1847 k = self._parse_string() 1848 self._match(TokenType.EQ) 1849 v = self._parse_string() 1850 options = [k, v] 1851 self._match_r_paren() 1852 1853 self._match(TokenType.ALIAS) 1854 return self.expression( 1855 exp.Cache, 1856 this=table, 1857 lazy=lazy, 1858 options=options, 1859 expression=self._parse_select(nested=True), 1860 ) 1861 1862 def _parse_partition(self) -> t.Optional[exp.Partition]: 1863 if not self._match(TokenType.PARTITION): 1864 return None 1865 1866 return self.expression( 1867 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1868 ) 1869 1870 def _parse_value(self) -> exp.Tuple: 1871 if self._match(TokenType.L_PAREN): 1872 expressions = self._parse_csv(self._parse_conjunction) 1873 self._match_r_paren() 1874 return self.expression(exp.Tuple, expressions=expressions) 1875 1876 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1877 # Source: https://prestodb.io/docs/current/sql/values.html 1878 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1879 1880 def _parse_select( 1881 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1882 ) -> t.Optional[exp.Expression]: 1883 cte = self._parse_with() 1884 if cte: 1885 this = self._parse_statement() 1886 1887 if not this: 1888 self.raise_error("Failed to parse any statement following CTE") 1889 return cte 1890 1891 if "with" in this.arg_types: 1892 this.set("with", cte) 1893 else: 1894 self.raise_error(f"{this.key} does not support CTE") 1895 this = cte 1896 elif self._match(TokenType.SELECT): 1897 comments = self._prev_comments 1898 1899 hint = self._parse_hint() 1900 all_ = self._match(TokenType.ALL) 1901 distinct = self._match(TokenType.DISTINCT) 1902 1903 kind = ( 1904 self._match(TokenType.ALIAS) 1905 and self._match_texts(("STRUCT", "VALUE")) 1906 and self._prev.text 1907 ) 1908 1909 if distinct: 1910 distinct = self.expression( 1911 exp.Distinct, 1912 on=self._parse_value() if self._match(TokenType.ON) else None, 1913 ) 1914 1915 if all_ and distinct: 1916 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1917 1918 limit = self._parse_limit(top=True) 1919 expressions = self._parse_csv(self._parse_expression) 1920 1921 this = self.expression( 1922 exp.Select, 1923 kind=kind, 1924 hint=hint, 1925 distinct=distinct, 1926 expressions=expressions, 1927 limit=limit, 1928 ) 1929 this.comments = comments 1930 1931 into = self._parse_into() 1932 if into: 1933 this.set("into", into) 1934 1935 from_ = self._parse_from() 1936 if from_: 1937 this.set("from", from_) 1938 1939 this = self._parse_query_modifiers(this) 1940 elif (table or nested) and self._match(TokenType.L_PAREN): 1941 if self._match(TokenType.PIVOT): 1942 this = self._parse_simplified_pivot() 1943 elif self._match(TokenType.FROM): 1944 this = exp.select("*").from_( 1945 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1946 ) 1947 else: 1948 this = self._parse_table() if table else self._parse_select(nested=True) 1949 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1950 1951 self._match_r_paren() 1952 1953 # early return so that subquery unions aren't parsed again 1954 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1955 # Union ALL should be a property of the top select node, not the subquery 1956 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1957 elif self._match(TokenType.VALUES): 1958 this = self.expression( 1959 exp.Values, 1960 expressions=self._parse_csv(self._parse_value), 1961 alias=self._parse_table_alias(), 1962 ) 1963 else: 1964 this = None 1965 1966 return self._parse_set_operations(this) 1967 1968 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1969 if not skip_with_token and not self._match(TokenType.WITH): 1970 return None 1971 1972 comments = self._prev_comments 1973 recursive = self._match(TokenType.RECURSIVE) 1974 1975 expressions = [] 1976 while True: 1977 expressions.append(self._parse_cte()) 1978 1979 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1980 break 1981 else: 1982 self._match(TokenType.WITH) 1983 1984 return self.expression( 1985 exp.With, comments=comments, expressions=expressions, recursive=recursive 1986 ) 1987 1988 def _parse_cte(self) -> exp.CTE: 1989 alias = self._parse_table_alias() 1990 if not alias or not alias.this: 1991 self.raise_error("Expected CTE to have alias") 1992 1993 self._match(TokenType.ALIAS) 1994 return self.expression( 1995 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1996 ) 1997 1998 def _parse_table_alias( 1999 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2000 ) -> t.Optional[exp.TableAlias]: 2001 any_token = self._match(TokenType.ALIAS) 2002 alias = ( 2003 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2004 or self._parse_string_as_identifier() 2005 ) 2006 2007 index = self._index 2008 if self._match(TokenType.L_PAREN): 2009 columns = self._parse_csv(self._parse_function_parameter) 2010 self._match_r_paren() if columns else self._retreat(index) 2011 else: 2012 columns = None 2013 2014 if not alias and not columns: 2015 return None 2016 2017 return self.expression(exp.TableAlias, this=alias, columns=columns) 2018 2019 def _parse_subquery( 2020 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2021 ) -> t.Optional[exp.Subquery]: 2022 if not this: 2023 return None 2024 2025 return self.expression( 2026 exp.Subquery, 2027 this=this, 2028 pivots=self._parse_pivots(), 2029 alias=self._parse_table_alias() if parse_alias else None, 2030 ) 2031 2032 def _parse_query_modifiers( 2033 self, this: t.Optional[exp.Expression] 2034 ) -> t.Optional[exp.Expression]: 2035 if isinstance(this, self.MODIFIABLES): 2036 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2037 expression = parser(self) 2038 2039 if expression: 2040 if key == "limit": 2041 offset = expression.args.pop("offset", None) 2042 if offset: 2043 this.set("offset", exp.Offset(expression=offset)) 2044 this.set(key, expression) 2045 return this 2046 2047 def _parse_hint(self) -> t.Optional[exp.Hint]: 2048 if self._match(TokenType.HINT): 2049 hints = self._parse_csv(self._parse_function) 2050 2051 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2052 self.raise_error("Expected */ after HINT") 2053 2054 return self.expression(exp.Hint, expressions=hints) 2055 2056 return None 2057 2058 def _parse_into(self) -> t.Optional[exp.Into]: 2059 if not self._match(TokenType.INTO): 2060 return None 2061 2062 temp = self._match(TokenType.TEMPORARY) 2063 unlogged = self._match_text_seq("UNLOGGED") 2064 self._match(TokenType.TABLE) 2065 2066 return self.expression( 2067 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2068 ) 2069 2070 def _parse_from( 2071 self, modifiers: bool = False, skip_from_token: bool = False 2072 ) -> t.Optional[exp.From]: 2073 if not skip_from_token and not self._match(TokenType.FROM): 2074 return None 2075 2076 comments = self._prev_comments 2077 this = self._parse_table() 2078 2079 return self.expression( 2080 exp.From, 2081 comments=comments, 2082 this=self._parse_query_modifiers(this) if modifiers else this, 2083 ) 2084 2085 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2086 if not self._match(TokenType.MATCH_RECOGNIZE): 2087 return None 2088 2089 self._match_l_paren() 2090 2091 partition = self._parse_partition_by() 2092 order = self._parse_order() 2093 measures = ( 2094 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2095 ) 2096 2097 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2098 rows = exp.var("ONE ROW PER MATCH") 2099 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2100 text = "ALL ROWS PER MATCH" 2101 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2102 text += f" SHOW EMPTY MATCHES" 2103 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2104 text += f" OMIT EMPTY MATCHES" 2105 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2106 text += f" WITH UNMATCHED ROWS" 2107 rows = exp.var(text) 2108 else: 2109 rows = None 2110 2111 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2112 text = "AFTER MATCH SKIP" 2113 if self._match_text_seq("PAST", "LAST", "ROW"): 2114 text += f" PAST LAST ROW" 2115 elif self._match_text_seq("TO", "NEXT", "ROW"): 2116 text += f" TO NEXT ROW" 2117 elif self._match_text_seq("TO", "FIRST"): 2118 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2119 elif self._match_text_seq("TO", "LAST"): 2120 text += f" TO LAST {self._advance_any().text}" # type: ignore 2121 after = exp.var(text) 2122 else: 2123 after = None 2124 2125 if self._match_text_seq("PATTERN"): 2126 self._match_l_paren() 2127 2128 if not self._curr: 2129 self.raise_error("Expecting )", self._curr) 2130 2131 paren = 1 2132 start = self._curr 2133 2134 while self._curr and paren > 0: 2135 if self._curr.token_type == TokenType.L_PAREN: 2136 paren += 1 2137 if self._curr.token_type == TokenType.R_PAREN: 2138 paren -= 1 2139 2140 end = self._prev 2141 self._advance() 2142 2143 if paren > 0: 2144 self.raise_error("Expecting )", self._curr) 2145 2146 pattern = exp.var(self._find_sql(start, end)) 2147 else: 2148 pattern = None 2149 2150 define = ( 2151 self._parse_csv( 2152 lambda: self.expression( 2153 exp.Alias, 2154 alias=self._parse_id_var(any_token=True), 2155 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2156 ) 2157 ) 2158 if self._match_text_seq("DEFINE") 2159 else None 2160 ) 2161 2162 self._match_r_paren() 2163 2164 return self.expression( 2165 exp.MatchRecognize, 2166 partition_by=partition, 2167 order=order, 2168 measures=measures, 2169 rows=rows, 2170 after=after, 2171 pattern=pattern, 2172 define=define, 2173 alias=self._parse_table_alias(), 2174 ) 2175 2176 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2177 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2178 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2179 2180 if outer_apply or cross_apply: 2181 this = self._parse_select(table=True) 2182 view = None 2183 outer = not cross_apply 2184 elif self._match(TokenType.LATERAL): 2185 this = self._parse_select(table=True) 2186 view = self._match(TokenType.VIEW) 2187 outer = self._match(TokenType.OUTER) 2188 else: 2189 return None 2190 2191 if not this: 2192 this = self._parse_function() or self._parse_id_var(any_token=False) 2193 while self._match(TokenType.DOT): 2194 this = exp.Dot( 2195 this=this, 2196 expression=self._parse_function() or self._parse_id_var(any_token=False), 2197 ) 2198 2199 if view: 2200 table = self._parse_id_var(any_token=False) 2201 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2202 table_alias: t.Optional[exp.TableAlias] = self.expression( 2203 exp.TableAlias, this=table, columns=columns 2204 ) 2205 elif isinstance(this, exp.Subquery) and this.alias: 2206 # Ensures parity between the Subquery's and the Lateral's "alias" args 2207 table_alias = this.args["alias"].copy() 2208 else: 2209 table_alias = self._parse_table_alias() 2210 2211 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2212 2213 def _parse_join_parts( 2214 self, 2215 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2216 return ( 2217 self._match_set(self.JOIN_METHODS) and self._prev, 2218 self._match_set(self.JOIN_SIDES) and self._prev, 2219 self._match_set(self.JOIN_KINDS) and self._prev, 2220 ) 2221 2222 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2223 if self._match(TokenType.COMMA): 2224 return self.expression(exp.Join, this=self._parse_table()) 2225 2226 index = self._index 2227 method, side, kind = self._parse_join_parts() 2228 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2229 join = self._match(TokenType.JOIN) 2230 2231 if not skip_join_token and not join: 2232 self._retreat(index) 2233 kind = None 2234 method = None 2235 side = None 2236 2237 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2238 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2239 2240 if not skip_join_token and not join and not outer_apply and not cross_apply: 2241 return None 2242 2243 if outer_apply: 2244 side = Token(TokenType.LEFT, "LEFT") 2245 2246 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2247 2248 if method: 2249 kwargs["method"] = method.text 2250 if side: 2251 kwargs["side"] = side.text 2252 if kind: 2253 kwargs["kind"] = kind.text 2254 if hint: 2255 kwargs["hint"] = hint 2256 2257 if self._match(TokenType.ON): 2258 kwargs["on"] = self._parse_conjunction() 2259 elif self._match(TokenType.USING): 2260 kwargs["using"] = self._parse_wrapped_id_vars() 2261 2262 return self.expression(exp.Join, **kwargs) 2263 2264 def _parse_index( 2265 self, 2266 index: t.Optional[exp.Expression] = None, 2267 ) -> t.Optional[exp.Index]: 2268 if index: 2269 unique = None 2270 primary = None 2271 amp = None 2272 2273 self._match(TokenType.ON) 2274 self._match(TokenType.TABLE) # hive 2275 table = self._parse_table_parts(schema=True) 2276 else: 2277 unique = self._match(TokenType.UNIQUE) 2278 primary = self._match_text_seq("PRIMARY") 2279 amp = self._match_text_seq("AMP") 2280 2281 if not self._match(TokenType.INDEX): 2282 return None 2283 2284 index = self._parse_id_var() 2285 table = None 2286 2287 using = self._parse_field() if self._match(TokenType.USING) else None 2288 2289 if self._match(TokenType.L_PAREN, advance=False): 2290 columns = self._parse_wrapped_csv(self._parse_ordered) 2291 else: 2292 columns = None 2293 2294 return self.expression( 2295 exp.Index, 2296 this=index, 2297 table=table, 2298 using=using, 2299 columns=columns, 2300 unique=unique, 2301 primary=primary, 2302 amp=amp, 2303 partition_by=self._parse_partition_by(), 2304 ) 2305 2306 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2307 hints: t.List[exp.Expression] = [] 2308 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2309 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2310 hints.append( 2311 self.expression( 2312 exp.WithTableHint, 2313 expressions=self._parse_csv( 2314 lambda: self._parse_function() or self._parse_var(any_token=True) 2315 ), 2316 ) 2317 ) 2318 self._match_r_paren() 2319 else: 2320 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2321 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2322 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2323 2324 self._match_texts({"INDEX", "KEY"}) 2325 if self._match(TokenType.FOR): 2326 hint.set("target", self._advance_any() and self._prev.text.upper()) 2327 2328 hint.set("expressions", self._parse_wrapped_id_vars()) 2329 hints.append(hint) 2330 2331 return hints or None 2332 2333 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2334 return ( 2335 (not schema and self._parse_function(optional_parens=False)) 2336 or self._parse_id_var(any_token=False) 2337 or self._parse_string_as_identifier() 2338 or self._parse_placeholder() 2339 ) 2340 2341 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2342 catalog = None 2343 db = None 2344 table = self._parse_table_part(schema=schema) 2345 2346 while self._match(TokenType.DOT): 2347 if catalog: 2348 # This allows nesting the table in arbitrarily many dot expressions if needed 2349 table = self.expression( 2350 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2351 ) 2352 else: 2353 catalog = db 2354 db = table 2355 table = self._parse_table_part(schema=schema) 2356 2357 if not table: 2358 self.raise_error(f"Expected table name but got {self._curr}") 2359 2360 return self.expression( 2361 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2362 ) 2363 2364 def _parse_table( 2365 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2366 ) -> t.Optional[exp.Expression]: 2367 lateral = self._parse_lateral() 2368 if lateral: 2369 return lateral 2370 2371 unnest = self._parse_unnest() 2372 if unnest: 2373 return unnest 2374 2375 values = self._parse_derived_table_values() 2376 if values: 2377 return values 2378 2379 subquery = self._parse_select(table=True) 2380 if subquery: 2381 if not subquery.args.get("pivots"): 2382 subquery.set("pivots", self._parse_pivots()) 2383 return subquery 2384 2385 this: exp.Expression = self._parse_table_parts(schema=schema) 2386 2387 if schema: 2388 return self._parse_schema(this=this) 2389 2390 if self.ALIAS_POST_TABLESAMPLE: 2391 table_sample = self._parse_table_sample() 2392 2393 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2394 if alias: 2395 this.set("alias", alias) 2396 2397 if not this.args.get("pivots"): 2398 this.set("pivots", self._parse_pivots()) 2399 2400 this.set("hints", self._parse_table_hints()) 2401 2402 if not self.ALIAS_POST_TABLESAMPLE: 2403 table_sample = self._parse_table_sample() 2404 2405 if table_sample: 2406 table_sample.set("this", this) 2407 this = table_sample 2408 2409 return this 2410 2411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2412 if not self._match(TokenType.UNNEST): 2413 return None 2414 2415 expressions = self._parse_wrapped_csv(self._parse_type) 2416 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2417 2418 alias = self._parse_table_alias() if with_alias else None 2419 2420 if alias and self.UNNEST_COLUMN_ONLY: 2421 if alias.args.get("columns"): 2422 self.raise_error("Unexpected extra column alias in unnest.") 2423 2424 alias.set("columns", [alias.this]) 2425 alias.set("this", None) 2426 2427 offset = None 2428 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2429 self._match(TokenType.ALIAS) 2430 offset = self._parse_id_var() or exp.to_identifier("offset") 2431 2432 return self.expression( 2433 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2434 ) 2435 2436 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2437 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2438 if not is_derived and not self._match(TokenType.VALUES): 2439 return None 2440 2441 expressions = self._parse_csv(self._parse_value) 2442 alias = self._parse_table_alias() 2443 2444 if is_derived: 2445 self._match_r_paren() 2446 2447 return self.expression( 2448 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2449 ) 2450 2451 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2452 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2453 as_modifier and self._match_text_seq("USING", "SAMPLE") 2454 ): 2455 return None 2456 2457 bucket_numerator = None 2458 bucket_denominator = None 2459 bucket_field = None 2460 percent = None 2461 rows = None 2462 size = None 2463 seed = None 2464 2465 kind = ( 2466 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2467 ) 2468 method = self._parse_var(tokens=(TokenType.ROW,)) 2469 2470 self._match(TokenType.L_PAREN) 2471 2472 num = self._parse_number() 2473 2474 if self._match_text_seq("BUCKET"): 2475 bucket_numerator = self._parse_number() 2476 self._match_text_seq("OUT", "OF") 2477 bucket_denominator = bucket_denominator = self._parse_number() 2478 self._match(TokenType.ON) 2479 bucket_field = self._parse_field() 2480 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2481 percent = num 2482 elif self._match(TokenType.ROWS): 2483 rows = num 2484 else: 2485 size = num 2486 2487 self._match(TokenType.R_PAREN) 2488 2489 if self._match(TokenType.L_PAREN): 2490 method = self._parse_var() 2491 seed = self._match(TokenType.COMMA) and self._parse_number() 2492 self._match_r_paren() 2493 elif self._match_texts(("SEED", "REPEATABLE")): 2494 seed = self._parse_wrapped(self._parse_number) 2495 2496 return self.expression( 2497 exp.TableSample, 2498 method=method, 2499 bucket_numerator=bucket_numerator, 2500 bucket_denominator=bucket_denominator, 2501 bucket_field=bucket_field, 2502 percent=percent, 2503 rows=rows, 2504 size=size, 2505 seed=seed, 2506 kind=kind, 2507 ) 2508 2509 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2510 return list(iter(self._parse_pivot, None)) 2511 2512 # https://duckdb.org/docs/sql/statements/pivot 2513 def _parse_simplified_pivot(self) -> exp.Pivot: 2514 def _parse_on() -> t.Optional[exp.Expression]: 2515 this = self._parse_bitwise() 2516 return self._parse_in(this) if self._match(TokenType.IN) else this 2517 2518 this = self._parse_table() 2519 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2520 using = self._match(TokenType.USING) and self._parse_csv( 2521 lambda: self._parse_alias(self._parse_function()) 2522 ) 2523 group = self._parse_group() 2524 return self.expression( 2525 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2526 ) 2527 2528 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2529 index = self._index 2530 2531 if self._match(TokenType.PIVOT): 2532 unpivot = False 2533 elif self._match(TokenType.UNPIVOT): 2534 unpivot = True 2535 else: 2536 return None 2537 2538 expressions = [] 2539 field = None 2540 2541 if not self._match(TokenType.L_PAREN): 2542 self._retreat(index) 2543 return None 2544 2545 if unpivot: 2546 expressions = self._parse_csv(self._parse_column) 2547 else: 2548 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2549 2550 if not expressions: 2551 self.raise_error("Failed to parse PIVOT's aggregation list") 2552 2553 if not self._match(TokenType.FOR): 2554 self.raise_error("Expecting FOR") 2555 2556 value = self._parse_column() 2557 2558 if not self._match(TokenType.IN): 2559 self.raise_error("Expecting IN") 2560 2561 field = self._parse_in(value, alias=True) 2562 2563 self._match_r_paren() 2564 2565 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2566 2567 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2568 pivot.set("alias", self._parse_table_alias()) 2569 2570 if not unpivot: 2571 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2572 2573 columns: t.List[exp.Expression] = [] 2574 for fld in pivot.args["field"].expressions: 2575 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2576 for name in names: 2577 if self.PREFIXED_PIVOT_COLUMNS: 2578 name = f"{name}_{field_name}" if name else field_name 2579 else: 2580 name = f"{field_name}_{name}" if name else field_name 2581 2582 columns.append(exp.to_identifier(name)) 2583 2584 pivot.set("columns", columns) 2585 2586 return pivot 2587 2588 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2589 return [agg.alias for agg in aggregations] 2590 2591 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2592 if not skip_where_token and not self._match(TokenType.WHERE): 2593 return None 2594 2595 return self.expression( 2596 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2597 ) 2598 2599 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2600 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2601 return None 2602 2603 elements = defaultdict(list) 2604 2605 while True: 2606 expressions = self._parse_csv(self._parse_conjunction) 2607 if expressions: 2608 elements["expressions"].extend(expressions) 2609 2610 grouping_sets = self._parse_grouping_sets() 2611 if grouping_sets: 2612 elements["grouping_sets"].extend(grouping_sets) 2613 2614 rollup = None 2615 cube = None 2616 totals = None 2617 2618 with_ = self._match(TokenType.WITH) 2619 if self._match(TokenType.ROLLUP): 2620 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2621 elements["rollup"].extend(ensure_list(rollup)) 2622 2623 if self._match(TokenType.CUBE): 2624 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2625 elements["cube"].extend(ensure_list(cube)) 2626 2627 if self._match_text_seq("TOTALS"): 2628 totals = True 2629 elements["totals"] = True # type: ignore 2630 2631 if not (grouping_sets or rollup or cube or totals): 2632 break 2633 2634 return self.expression(exp.Group, **elements) # type: ignore 2635 2636 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2637 if not self._match(TokenType.GROUPING_SETS): 2638 return None 2639 2640 return self._parse_wrapped_csv(self._parse_grouping_set) 2641 2642 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2643 if self._match(TokenType.L_PAREN): 2644 grouping_set = self._parse_csv(self._parse_column) 2645 self._match_r_paren() 2646 return self.expression(exp.Tuple, expressions=grouping_set) 2647 2648 return self._parse_column() 2649 2650 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2651 if not skip_having_token and not self._match(TokenType.HAVING): 2652 return None 2653 return self.expression(exp.Having, this=self._parse_conjunction()) 2654 2655 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2656 if not self._match(TokenType.QUALIFY): 2657 return None 2658 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2659 2660 def _parse_order( 2661 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2662 ) -> t.Optional[exp.Expression]: 2663 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2664 return this 2665 2666 return self.expression( 2667 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2668 ) 2669 2670 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2671 if not self._match(token): 2672 return None 2673 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2674 2675 def _parse_ordered(self) -> exp.Ordered: 2676 this = self._parse_conjunction() 2677 self._match(TokenType.ASC) 2678 2679 is_desc = self._match(TokenType.DESC) 2680 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2681 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2682 desc = is_desc or False 2683 asc = not desc 2684 nulls_first = is_nulls_first or False 2685 explicitly_null_ordered = is_nulls_first or is_nulls_last 2686 2687 if ( 2688 not explicitly_null_ordered 2689 and ( 2690 (asc and self.NULL_ORDERING == "nulls_are_small") 2691 or (desc and self.NULL_ORDERING != "nulls_are_small") 2692 ) 2693 and self.NULL_ORDERING != "nulls_are_last" 2694 ): 2695 nulls_first = True 2696 2697 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2698 2699 def _parse_limit( 2700 self, this: t.Optional[exp.Expression] = None, top: bool = False 2701 ) -> t.Optional[exp.Expression]: 2702 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2703 limit_paren = self._match(TokenType.L_PAREN) 2704 expression = self._parse_number() if top else self._parse_term() 2705 2706 if self._match(TokenType.COMMA): 2707 offset = expression 2708 expression = self._parse_term() 2709 else: 2710 offset = None 2711 2712 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2713 2714 if limit_paren: 2715 self._match_r_paren() 2716 2717 return limit_exp 2718 2719 if self._match(TokenType.FETCH): 2720 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2721 direction = self._prev.text if direction else "FIRST" 2722 2723 count = self._parse_number() 2724 percent = self._match(TokenType.PERCENT) 2725 2726 self._match_set((TokenType.ROW, TokenType.ROWS)) 2727 2728 only = self._match_text_seq("ONLY") 2729 with_ties = self._match_text_seq("WITH", "TIES") 2730 2731 if only and with_ties: 2732 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2733 2734 return self.expression( 2735 exp.Fetch, 2736 direction=direction, 2737 count=count, 2738 percent=percent, 2739 with_ties=with_ties, 2740 ) 2741 2742 return this 2743 2744 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2745 if not self._match(TokenType.OFFSET): 2746 return this 2747 2748 count = self._parse_number() 2749 self._match_set((TokenType.ROW, TokenType.ROWS)) 2750 return self.expression(exp.Offset, this=this, expression=count) 2751 2752 def _parse_locks(self) -> t.List[exp.Lock]: 2753 locks = [] 2754 while True: 2755 if self._match_text_seq("FOR", "UPDATE"): 2756 update = True 2757 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2758 "LOCK", "IN", "SHARE", "MODE" 2759 ): 2760 update = False 2761 else: 2762 break 2763 2764 expressions = None 2765 if self._match_text_seq("OF"): 2766 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2767 2768 wait: t.Optional[bool | exp.Expression] = None 2769 if self._match_text_seq("NOWAIT"): 2770 wait = True 2771 elif self._match_text_seq("WAIT"): 2772 wait = self._parse_primary() 2773 elif self._match_text_seq("SKIP", "LOCKED"): 2774 wait = False 2775 2776 locks.append( 2777 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2778 ) 2779 2780 return locks 2781 2782 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2783 if not self._match_set(self.SET_OPERATIONS): 2784 return this 2785 2786 token_type = self._prev.token_type 2787 2788 if token_type == TokenType.UNION: 2789 expression = exp.Union 2790 elif token_type == TokenType.EXCEPT: 2791 expression = exp.Except 2792 else: 2793 expression = exp.Intersect 2794 2795 return self.expression( 2796 expression, 2797 this=this, 2798 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2799 expression=self._parse_set_operations(self._parse_select(nested=True)), 2800 ) 2801 2802 def _parse_expression(self) -> t.Optional[exp.Expression]: 2803 return self._parse_alias(self._parse_conjunction()) 2804 2805 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2806 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2807 2808 def _parse_equality(self) -> t.Optional[exp.Expression]: 2809 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2810 2811 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2812 return self._parse_tokens(self._parse_range, self.COMPARISON) 2813 2814 def _parse_range(self) -> t.Optional[exp.Expression]: 2815 this = self._parse_bitwise() 2816 negate = self._match(TokenType.NOT) 2817 2818 if self._match_set(self.RANGE_PARSERS): 2819 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2820 if not expression: 2821 return this 2822 2823 this = expression 2824 elif self._match(TokenType.ISNULL): 2825 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2826 2827 # Postgres supports ISNULL and NOTNULL for conditions. 2828 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2829 if self._match(TokenType.NOTNULL): 2830 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2831 this = self.expression(exp.Not, this=this) 2832 2833 if negate: 2834 this = self.expression(exp.Not, this=this) 2835 2836 if self._match(TokenType.IS): 2837 this = self._parse_is(this) 2838 2839 return this 2840 2841 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2842 index = self._index - 1 2843 negate = self._match(TokenType.NOT) 2844 2845 if self._match_text_seq("DISTINCT", "FROM"): 2846 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2847 return self.expression(klass, this=this, expression=self._parse_expression()) 2848 2849 expression = self._parse_null() or self._parse_boolean() 2850 if not expression: 2851 self._retreat(index) 2852 return None 2853 2854 this = self.expression(exp.Is, this=this, expression=expression) 2855 return self.expression(exp.Not, this=this) if negate else this 2856 2857 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2858 unnest = self._parse_unnest(with_alias=False) 2859 if unnest: 2860 this = self.expression(exp.In, this=this, unnest=unnest) 2861 elif self._match(TokenType.L_PAREN): 2862 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2863 2864 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2865 this = self.expression(exp.In, this=this, query=expressions[0]) 2866 else: 2867 this = self.expression(exp.In, this=this, expressions=expressions) 2868 2869 self._match_r_paren(this) 2870 else: 2871 this = self.expression(exp.In, this=this, field=self._parse_field()) 2872 2873 return this 2874 2875 def _parse_between(self, this: exp.Expression) -> exp.Between: 2876 low = self._parse_bitwise() 2877 self._match(TokenType.AND) 2878 high = self._parse_bitwise() 2879 return self.expression(exp.Between, this=this, low=low, high=high) 2880 2881 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2882 if not self._match(TokenType.ESCAPE): 2883 return this 2884 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2885 2886 def _parse_interval(self) -> t.Optional[exp.Interval]: 2887 if not self._match(TokenType.INTERVAL): 2888 return None 2889 2890 if self._match(TokenType.STRING, advance=False): 2891 this = self._parse_primary() 2892 else: 2893 this = self._parse_term() 2894 2895 unit = self._parse_function() or self._parse_var() 2896 2897 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2898 # each INTERVAL expression into this canonical form so it's easy to transpile 2899 if this and this.is_number: 2900 this = exp.Literal.string(this.name) 2901 elif this and this.is_string: 2902 parts = this.name.split() 2903 2904 if len(parts) == 2: 2905 if unit: 2906 # this is not actually a unit, it's something else 2907 unit = None 2908 self._retreat(self._index - 1) 2909 else: 2910 this = exp.Literal.string(parts[0]) 2911 unit = self.expression(exp.Var, this=parts[1]) 2912 2913 return self.expression(exp.Interval, this=this, unit=unit) 2914 2915 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2916 this = self._parse_term() 2917 2918 while True: 2919 if self._match_set(self.BITWISE): 2920 this = self.expression( 2921 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2922 ) 2923 elif self._match_pair(TokenType.LT, TokenType.LT): 2924 this = self.expression( 2925 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2926 ) 2927 elif self._match_pair(TokenType.GT, TokenType.GT): 2928 this = self.expression( 2929 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2930 ) 2931 else: 2932 break 2933 2934 return this 2935 2936 def _parse_term(self) -> t.Optional[exp.Expression]: 2937 return self._parse_tokens(self._parse_factor, self.TERM) 2938 2939 def _parse_factor(self) -> t.Optional[exp.Expression]: 2940 return self._parse_tokens(self._parse_unary, self.FACTOR) 2941 2942 def _parse_unary(self) -> t.Optional[exp.Expression]: 2943 if self._match_set(self.UNARY_PARSERS): 2944 return self.UNARY_PARSERS[self._prev.token_type](self) 2945 return self._parse_at_time_zone(self._parse_type()) 2946 2947 def _parse_type(self) -> t.Optional[exp.Expression]: 2948 interval = self._parse_interval() 2949 if interval: 2950 return interval 2951 2952 index = self._index 2953 data_type = self._parse_types(check_func=True) 2954 this = self._parse_column() 2955 2956 if data_type: 2957 if isinstance(this, exp.Literal): 2958 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2959 if parser: 2960 return parser(self, this, data_type) 2961 return self.expression(exp.Cast, this=this, to=data_type) 2962 if not data_type.expressions: 2963 self._retreat(index) 2964 return self._parse_column() 2965 return self._parse_column_ops(data_type) 2966 2967 return this 2968 2969 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2970 this = self._parse_type() 2971 if not this: 2972 return None 2973 2974 return self.expression( 2975 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2976 ) 2977 2978 def _parse_types( 2979 self, check_func: bool = False, schema: bool = False 2980 ) -> t.Optional[exp.Expression]: 2981 index = self._index 2982 2983 prefix = self._match_text_seq("SYSUDTLIB", ".") 2984 2985 if not self._match_set(self.TYPE_TOKENS): 2986 return None 2987 2988 type_token = self._prev.token_type 2989 2990 if type_token == TokenType.PSEUDO_TYPE: 2991 return self.expression(exp.PseudoType, this=self._prev.text) 2992 2993 nested = type_token in self.NESTED_TYPE_TOKENS 2994 is_struct = type_token == TokenType.STRUCT 2995 expressions = None 2996 maybe_func = False 2997 2998 if self._match(TokenType.L_PAREN): 2999 if is_struct: 3000 expressions = self._parse_csv(self._parse_struct_types) 3001 elif nested: 3002 expressions = self._parse_csv( 3003 lambda: self._parse_types(check_func=check_func, schema=schema) 3004 ) 3005 elif type_token in self.ENUM_TYPE_TOKENS: 3006 expressions = self._parse_csv(self._parse_primary) 3007 else: 3008 expressions = self._parse_csv(self._parse_type_size) 3009 3010 if not expressions or not self._match(TokenType.R_PAREN): 3011 self._retreat(index) 3012 return None 3013 3014 maybe_func = True 3015 3016 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3017 this = exp.DataType( 3018 this=exp.DataType.Type.ARRAY, 3019 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3020 nested=True, 3021 ) 3022 3023 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3024 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3025 3026 return this 3027 3028 if self._match(TokenType.L_BRACKET): 3029 self._retreat(index) 3030 return None 3031 3032 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3033 if nested and self._match(TokenType.LT): 3034 if is_struct: 3035 expressions = self._parse_csv(self._parse_struct_types) 3036 else: 3037 expressions = self._parse_csv( 3038 lambda: self._parse_types(check_func=check_func, schema=schema) 3039 ) 3040 3041 if not self._match(TokenType.GT): 3042 self.raise_error("Expecting >") 3043 3044 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3045 values = self._parse_csv(self._parse_conjunction) 3046 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3047 3048 value: t.Optional[exp.Expression] = None 3049 if type_token in self.TIMESTAMPS: 3050 if self._match_text_seq("WITH", "TIME", "ZONE"): 3051 maybe_func = False 3052 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3053 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3054 maybe_func = False 3055 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3056 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3057 maybe_func = False 3058 elif type_token == TokenType.INTERVAL: 3059 unit = self._parse_var() 3060 3061 if not unit: 3062 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3063 else: 3064 value = self.expression(exp.Interval, unit=unit) 3065 3066 if maybe_func and check_func: 3067 index2 = self._index 3068 peek = self._parse_string() 3069 3070 if not peek: 3071 self._retreat(index) 3072 return None 3073 3074 self._retreat(index2) 3075 3076 if value: 3077 return value 3078 3079 return exp.DataType( 3080 this=exp.DataType.Type[type_token.value.upper()], 3081 expressions=expressions, 3082 nested=nested, 3083 values=values, 3084 prefix=prefix, 3085 ) 3086 3087 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3088 this = self._parse_type() or self._parse_id_var() 3089 self._match(TokenType.COLON) 3090 return self._parse_column_def(this) 3091 3092 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3093 if not self._match_text_seq("AT", "TIME", "ZONE"): 3094 return this 3095 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3096 3097 def _parse_column(self) -> t.Optional[exp.Expression]: 3098 this = self._parse_field() 3099 if isinstance(this, exp.Identifier): 3100 this = self.expression(exp.Column, this=this) 3101 elif not this: 3102 return self._parse_bracket(this) 3103 return self._parse_column_ops(this) 3104 3105 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3106 this = self._parse_bracket(this) 3107 3108 while self._match_set(self.COLUMN_OPERATORS): 3109 op_token = self._prev.token_type 3110 op = self.COLUMN_OPERATORS.get(op_token) 3111 3112 if op_token == TokenType.DCOLON: 3113 field = self._parse_types() 3114 if not field: 3115 self.raise_error("Expected type") 3116 elif op and self._curr: 3117 self._advance() 3118 value = self._prev.text 3119 field = ( 3120 exp.Literal.number(value) 3121 if self._prev.token_type == TokenType.NUMBER 3122 else exp.Literal.string(value) 3123 ) 3124 else: 3125 field = self._parse_field(anonymous_func=True, any_token=True) 3126 3127 if isinstance(field, exp.Func): 3128 # bigquery allows function calls like x.y.count(...) 3129 # SAFE.SUBSTR(...) 3130 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3131 this = self._replace_columns_with_dots(this) 3132 3133 if op: 3134 this = op(self, this, field) 3135 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3136 this = self.expression( 3137 exp.Column, 3138 this=field, 3139 table=this.this, 3140 db=this.args.get("table"), 3141 catalog=this.args.get("db"), 3142 ) 3143 else: 3144 this = self.expression(exp.Dot, this=this, expression=field) 3145 this = self._parse_bracket(this) 3146 return this 3147 3148 def _parse_primary(self) -> t.Optional[exp.Expression]: 3149 if self._match_set(self.PRIMARY_PARSERS): 3150 token_type = self._prev.token_type 3151 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3152 3153 if token_type == TokenType.STRING: 3154 expressions = [primary] 3155 while self._match(TokenType.STRING): 3156 expressions.append(exp.Literal.string(self._prev.text)) 3157 3158 if len(expressions) > 1: 3159 return self.expression(exp.Concat, expressions=expressions) 3160 3161 return primary 3162 3163 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3164 return exp.Literal.number(f"0.{self._prev.text}") 3165 3166 if self._match(TokenType.L_PAREN): 3167 comments = self._prev_comments 3168 query = self._parse_select() 3169 3170 if query: 3171 expressions = [query] 3172 else: 3173 expressions = self._parse_csv(self._parse_expression) 3174 3175 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3176 3177 if isinstance(this, exp.Subqueryable): 3178 this = self._parse_set_operations( 3179 self._parse_subquery(this=this, parse_alias=False) 3180 ) 3181 elif len(expressions) > 1: 3182 this = self.expression(exp.Tuple, expressions=expressions) 3183 else: 3184 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3185 3186 if this: 3187 this.add_comments(comments) 3188 3189 self._match_r_paren(expression=this) 3190 return this 3191 3192 return None 3193 3194 def _parse_field( 3195 self, 3196 any_token: bool = False, 3197 tokens: t.Optional[t.Collection[TokenType]] = None, 3198 anonymous_func: bool = False, 3199 ) -> t.Optional[exp.Expression]: 3200 return ( 3201 self._parse_primary() 3202 or self._parse_function(anonymous=anonymous_func) 3203 or self._parse_id_var(any_token=any_token, tokens=tokens) 3204 ) 3205 3206 def _parse_function( 3207 self, 3208 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3209 anonymous: bool = False, 3210 optional_parens: bool = True, 3211 ) -> t.Optional[exp.Expression]: 3212 if not self._curr: 3213 return None 3214 3215 token_type = self._curr.token_type 3216 3217 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3218 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3219 3220 if not self._next or self._next.token_type != TokenType.L_PAREN: 3221 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3222 self._advance() 3223 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3224 3225 return None 3226 3227 if token_type not in self.FUNC_TOKENS: 3228 return None 3229 3230 this = self._curr.text 3231 upper = this.upper() 3232 self._advance(2) 3233 3234 parser = self.FUNCTION_PARSERS.get(upper) 3235 3236 if parser and not anonymous: 3237 this = parser(self) 3238 else: 3239 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3240 3241 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3242 this = self.expression(subquery_predicate, this=self._parse_select()) 3243 self._match_r_paren() 3244 return this 3245 3246 if functions is None: 3247 functions = self.FUNCTIONS 3248 3249 function = functions.get(upper) 3250 3251 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3252 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3253 3254 if function and not anonymous: 3255 this = self.validate_expression(function(args), args) 3256 else: 3257 this = self.expression(exp.Anonymous, this=this, expressions=args) 3258 3259 self._match_r_paren(this) 3260 return self._parse_window(this) 3261 3262 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3263 return self._parse_column_def(self._parse_id_var()) 3264 3265 def _parse_user_defined_function( 3266 self, kind: t.Optional[TokenType] = None 3267 ) -> t.Optional[exp.Expression]: 3268 this = self._parse_id_var() 3269 3270 while self._match(TokenType.DOT): 3271 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3272 3273 if not self._match(TokenType.L_PAREN): 3274 return this 3275 3276 expressions = self._parse_csv(self._parse_function_parameter) 3277 self._match_r_paren() 3278 return self.expression( 3279 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3280 ) 3281 3282 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3283 literal = self._parse_primary() 3284 if literal: 3285 return self.expression(exp.Introducer, this=token.text, expression=literal) 3286 3287 return self.expression(exp.Identifier, this=token.text) 3288 3289 def _parse_session_parameter(self) -> exp.SessionParameter: 3290 kind = None 3291 this = self._parse_id_var() or self._parse_primary() 3292 3293 if this and self._match(TokenType.DOT): 3294 kind = this.name 3295 this = self._parse_var() or self._parse_primary() 3296 3297 return self.expression(exp.SessionParameter, this=this, kind=kind) 3298 3299 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3300 index = self._index 3301 3302 if self._match(TokenType.L_PAREN): 3303 expressions = self._parse_csv(self._parse_id_var) 3304 3305 if not self._match(TokenType.R_PAREN): 3306 self._retreat(index) 3307 else: 3308 expressions = [self._parse_id_var()] 3309 3310 if self._match_set(self.LAMBDAS): 3311 return self.LAMBDAS[self._prev.token_type](self, expressions) 3312 3313 self._retreat(index) 3314 3315 this: t.Optional[exp.Expression] 3316 3317 if self._match(TokenType.DISTINCT): 3318 this = self.expression( 3319 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3320 ) 3321 else: 3322 this = self._parse_select_or_expression(alias=alias) 3323 3324 if isinstance(this, exp.EQ): 3325 left = this.this 3326 if isinstance(left, exp.Column): 3327 left.replace(exp.var(left.text("this"))) 3328 3329 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3330 3331 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3332 index = self._index 3333 3334 if not self.errors: 3335 try: 3336 if self._parse_select(nested=True): 3337 return this 3338 except ParseError: 3339 pass 3340 finally: 3341 self.errors.clear() 3342 self._retreat(index) 3343 3344 if not self._match(TokenType.L_PAREN): 3345 return this 3346 3347 args = self._parse_csv( 3348 lambda: self._parse_constraint() 3349 or self._parse_column_def(self._parse_field(any_token=True)) 3350 ) 3351 3352 self._match_r_paren() 3353 return self.expression(exp.Schema, this=this, expressions=args) 3354 3355 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3356 # column defs are not really columns, they're identifiers 3357 if isinstance(this, exp.Column): 3358 this = this.this 3359 3360 kind = self._parse_types(schema=True) 3361 3362 if self._match_text_seq("FOR", "ORDINALITY"): 3363 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3364 3365 constraints = [] 3366 while True: 3367 constraint = self._parse_column_constraint() 3368 if not constraint: 3369 break 3370 constraints.append(constraint) 3371 3372 if not kind and not constraints: 3373 return this 3374 3375 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3376 3377 def _parse_auto_increment( 3378 self, 3379 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3380 start = None 3381 increment = None 3382 3383 if self._match(TokenType.L_PAREN, advance=False): 3384 args = self._parse_wrapped_csv(self._parse_bitwise) 3385 start = seq_get(args, 0) 3386 increment = seq_get(args, 1) 3387 elif self._match_text_seq("START"): 3388 start = self._parse_bitwise() 3389 self._match_text_seq("INCREMENT") 3390 increment = self._parse_bitwise() 3391 3392 if start and increment: 3393 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3394 3395 return exp.AutoIncrementColumnConstraint() 3396 3397 def _parse_compress(self) -> exp.CompressColumnConstraint: 3398 if self._match(TokenType.L_PAREN, advance=False): 3399 return self.expression( 3400 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3401 ) 3402 3403 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3404 3405 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3406 if self._match_text_seq("BY", "DEFAULT"): 3407 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3408 this = self.expression( 3409 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3410 ) 3411 else: 3412 self._match_text_seq("ALWAYS") 3413 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3414 3415 self._match(TokenType.ALIAS) 3416 identity = self._match_text_seq("IDENTITY") 3417 3418 if self._match(TokenType.L_PAREN): 3419 if self._match_text_seq("START", "WITH"): 3420 this.set("start", self._parse_bitwise()) 3421 if self._match_text_seq("INCREMENT", "BY"): 3422 this.set("increment", self._parse_bitwise()) 3423 if self._match_text_seq("MINVALUE"): 3424 this.set("minvalue", self._parse_bitwise()) 3425 if self._match_text_seq("MAXVALUE"): 3426 this.set("maxvalue", self._parse_bitwise()) 3427 3428 if self._match_text_seq("CYCLE"): 3429 this.set("cycle", True) 3430 elif self._match_text_seq("NO", "CYCLE"): 3431 this.set("cycle", False) 3432 3433 if not identity: 3434 this.set("expression", self._parse_bitwise()) 3435 3436 self._match_r_paren() 3437 3438 return this 3439 3440 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3441 self._match_text_seq("LENGTH") 3442 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3443 3444 def _parse_not_constraint( 3445 self, 3446 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3447 if self._match_text_seq("NULL"): 3448 return self.expression(exp.NotNullColumnConstraint) 3449 if self._match_text_seq("CASESPECIFIC"): 3450 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3451 return None 3452 3453 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3454 if self._match(TokenType.CONSTRAINT): 3455 this = self._parse_id_var() 3456 else: 3457 this = None 3458 3459 if self._match_texts(self.CONSTRAINT_PARSERS): 3460 return self.expression( 3461 exp.ColumnConstraint, 3462 this=this, 3463 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3464 ) 3465 3466 return this 3467 3468 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3469 if not self._match(TokenType.CONSTRAINT): 3470 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3471 3472 this = self._parse_id_var() 3473 expressions = [] 3474 3475 while True: 3476 constraint = self._parse_unnamed_constraint() or self._parse_function() 3477 if not constraint: 3478 break 3479 expressions.append(constraint) 3480 3481 return self.expression(exp.Constraint, this=this, expressions=expressions) 3482 3483 def _parse_unnamed_constraint( 3484 self, constraints: t.Optional[t.Collection[str]] = None 3485 ) -> t.Optional[exp.Expression]: 3486 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3487 return None 3488 3489 constraint = self._prev.text.upper() 3490 if constraint not in self.CONSTRAINT_PARSERS: 3491 self.raise_error(f"No parser found for schema constraint {constraint}.") 3492 3493 return self.CONSTRAINT_PARSERS[constraint](self) 3494 3495 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3496 self._match_text_seq("KEY") 3497 return self.expression( 3498 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3499 ) 3500 3501 def _parse_key_constraint_options(self) -> t.List[str]: 3502 options = [] 3503 while True: 3504 if not self._curr: 3505 break 3506 3507 if self._match(TokenType.ON): 3508 action = None 3509 on = self._advance_any() and self._prev.text 3510 3511 if self._match_text_seq("NO", "ACTION"): 3512 action = "NO ACTION" 3513 elif self._match_text_seq("CASCADE"): 3514 action = "CASCADE" 3515 elif self._match_pair(TokenType.SET, TokenType.NULL): 3516 action = "SET NULL" 3517 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3518 action = "SET DEFAULT" 3519 else: 3520 self.raise_error("Invalid key constraint") 3521 3522 options.append(f"ON {on} {action}") 3523 elif self._match_text_seq("NOT", "ENFORCED"): 3524 options.append("NOT ENFORCED") 3525 elif self._match_text_seq("DEFERRABLE"): 3526 options.append("DEFERRABLE") 3527 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3528 options.append("INITIALLY DEFERRED") 3529 elif self._match_text_seq("NORELY"): 3530 options.append("NORELY") 3531 elif self._match_text_seq("MATCH", "FULL"): 3532 options.append("MATCH FULL") 3533 else: 3534 break 3535 3536 return options 3537 3538 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3539 if match and not self._match(TokenType.REFERENCES): 3540 return None 3541 3542 expressions = None 3543 this = self._parse_id_var() 3544 3545 if self._match(TokenType.L_PAREN, advance=False): 3546 expressions = self._parse_wrapped_id_vars() 3547 3548 options = self._parse_key_constraint_options() 3549 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3550 3551 def _parse_foreign_key(self) -> exp.ForeignKey: 3552 expressions = self._parse_wrapped_id_vars() 3553 reference = self._parse_references() 3554 options = {} 3555 3556 while self._match(TokenType.ON): 3557 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3558 self.raise_error("Expected DELETE or UPDATE") 3559 3560 kind = self._prev.text.lower() 3561 3562 if self._match_text_seq("NO", "ACTION"): 3563 action = "NO ACTION" 3564 elif self._match(TokenType.SET): 3565 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3566 action = "SET " + self._prev.text.upper() 3567 else: 3568 self._advance() 3569 action = self._prev.text.upper() 3570 3571 options[kind] = action 3572 3573 return self.expression( 3574 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3575 ) 3576 3577 def _parse_primary_key( 3578 self, wrapped_optional: bool = False, in_props: bool = False 3579 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3580 desc = ( 3581 self._match_set((TokenType.ASC, TokenType.DESC)) 3582 and self._prev.token_type == TokenType.DESC 3583 ) 3584 3585 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3586 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3587 3588 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3589 options = self._parse_key_constraint_options() 3590 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3591 3592 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3593 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3594 return this 3595 3596 bracket_kind = self._prev.token_type 3597 3598 if self._match(TokenType.COLON): 3599 expressions: t.List[t.Optional[exp.Expression]] = [ 3600 self.expression(exp.Slice, expression=self._parse_conjunction()) 3601 ] 3602 else: 3603 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3604 3605 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3606 if bracket_kind == TokenType.L_BRACE: 3607 this = self.expression(exp.Struct, expressions=expressions) 3608 elif not this or this.name.upper() == "ARRAY": 3609 this = self.expression(exp.Array, expressions=expressions) 3610 else: 3611 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3612 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3613 3614 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3615 self.raise_error("Expected ]") 3616 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3617 self.raise_error("Expected }") 3618 3619 self._add_comments(this) 3620 return self._parse_bracket(this) 3621 3622 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3623 if self._match(TokenType.COLON): 3624 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3625 return this 3626 3627 def _parse_case(self) -> t.Optional[exp.Expression]: 3628 ifs = [] 3629 default = None 3630 3631 expression = self._parse_conjunction() 3632 3633 while self._match(TokenType.WHEN): 3634 this = self._parse_conjunction() 3635 self._match(TokenType.THEN) 3636 then = self._parse_conjunction() 3637 ifs.append(self.expression(exp.If, this=this, true=then)) 3638 3639 if self._match(TokenType.ELSE): 3640 default = self._parse_conjunction() 3641 3642 if not self._match(TokenType.END): 3643 self.raise_error("Expected END after CASE", self._prev) 3644 3645 return self._parse_window( 3646 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3647 ) 3648 3649 def _parse_if(self) -> t.Optional[exp.Expression]: 3650 if self._match(TokenType.L_PAREN): 3651 args = self._parse_csv(self._parse_conjunction) 3652 this = self.validate_expression(exp.If.from_arg_list(args), args) 3653 self._match_r_paren() 3654 else: 3655 index = self._index - 1 3656 condition = self._parse_conjunction() 3657 3658 if not condition: 3659 self._retreat(index) 3660 return None 3661 3662 self._match(TokenType.THEN) 3663 true = self._parse_conjunction() 3664 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3665 self._match(TokenType.END) 3666 this = self.expression(exp.If, this=condition, true=true, false=false) 3667 3668 return self._parse_window(this) 3669 3670 def _parse_extract(self) -> exp.Extract: 3671 this = self._parse_function() or self._parse_var() or self._parse_type() 3672 3673 if self._match(TokenType.FROM): 3674 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3675 3676 if not self._match(TokenType.COMMA): 3677 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3678 3679 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3680 3681 def _parse_cast(self, strict: bool) -> exp.Expression: 3682 this = self._parse_conjunction() 3683 3684 if not self._match(TokenType.ALIAS): 3685 if self._match(TokenType.COMMA): 3686 return self.expression( 3687 exp.CastToStrType, this=this, expression=self._parse_string() 3688 ) 3689 else: 3690 self.raise_error("Expected AS after CAST") 3691 3692 to = self._parse_types() 3693 3694 if not to: 3695 self.raise_error("Expected TYPE after CAST") 3696 elif to.this == exp.DataType.Type.CHAR: 3697 if self._match(TokenType.CHARACTER_SET): 3698 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3699 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3700 fmt = self._parse_string() 3701 3702 return self.expression( 3703 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3704 this=this, 3705 format=exp.Literal.string( 3706 format_time( 3707 fmt.this if fmt else "", 3708 self.FORMAT_MAPPING or self.TIME_MAPPING, 3709 self.FORMAT_TRIE or self.TIME_TRIE, 3710 ) 3711 ), 3712 ) 3713 3714 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3715 3716 def _parse_concat(self) -> t.Optional[exp.Expression]: 3717 args = self._parse_csv(self._parse_conjunction) 3718 if self.CONCAT_NULL_OUTPUTS_STRING: 3719 args = [ 3720 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3721 for arg in args 3722 if arg 3723 ] 3724 3725 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3726 # we find such a call we replace it with its argument. 3727 if len(args) == 1: 3728 return args[0] 3729 3730 return self.expression( 3731 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3732 ) 3733 3734 def _parse_string_agg(self) -> exp.Expression: 3735 if self._match(TokenType.DISTINCT): 3736 args: t.List[t.Optional[exp.Expression]] = [ 3737 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3738 ] 3739 if self._match(TokenType.COMMA): 3740 args.extend(self._parse_csv(self._parse_conjunction)) 3741 else: 3742 args = self._parse_csv(self._parse_conjunction) 3743 3744 index = self._index 3745 if not self._match(TokenType.R_PAREN): 3746 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3747 return self.expression( 3748 exp.GroupConcat, 3749 this=seq_get(args, 0), 3750 separator=self._parse_order(this=seq_get(args, 1)), 3751 ) 3752 3753 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3754 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3755 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3756 if not self._match_text_seq("WITHIN", "GROUP"): 3757 self._retreat(index) 3758 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3759 3760 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3761 order = self._parse_order(this=seq_get(args, 0)) 3762 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3763 3764 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3765 this = self._parse_bitwise() 3766 3767 if self._match(TokenType.USING): 3768 to: t.Optional[exp.Expression] = self.expression( 3769 exp.CharacterSet, this=self._parse_var() 3770 ) 3771 elif self._match(TokenType.COMMA): 3772 to = self._parse_types() 3773 else: 3774 to = None 3775 3776 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3777 3778 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3779 """ 3780 There are generally two variants of the DECODE function: 3781 3782 - DECODE(bin, charset) 3783 - DECODE(expression, search, result [, search, result] ... [, default]) 3784 3785 The second variant will always be parsed into a CASE expression. Note that NULL 3786 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3787 instead of relying on pattern matching. 3788 """ 3789 args = self._parse_csv(self._parse_conjunction) 3790 3791 if len(args) < 3: 3792 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3793 3794 expression, *expressions = args 3795 if not expression: 3796 return None 3797 3798 ifs = [] 3799 for search, result in zip(expressions[::2], expressions[1::2]): 3800 if not search or not result: 3801 return None 3802 3803 if isinstance(search, exp.Literal): 3804 ifs.append( 3805 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3806 ) 3807 elif isinstance(search, exp.Null): 3808 ifs.append( 3809 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3810 ) 3811 else: 3812 cond = exp.or_( 3813 exp.EQ(this=expression.copy(), expression=search), 3814 exp.and_( 3815 exp.Is(this=expression.copy(), expression=exp.Null()), 3816 exp.Is(this=search.copy(), expression=exp.Null()), 3817 copy=False, 3818 ), 3819 copy=False, 3820 ) 3821 ifs.append(exp.If(this=cond, true=result)) 3822 3823 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3824 3825 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3826 self._match_text_seq("KEY") 3827 key = self._parse_field() 3828 self._match(TokenType.COLON) 3829 self._match_text_seq("VALUE") 3830 value = self._parse_field() 3831 3832 if not key and not value: 3833 return None 3834 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3835 3836 def _parse_json_object(self) -> exp.JSONObject: 3837 star = self._parse_star() 3838 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3839 3840 null_handling = None 3841 if self._match_text_seq("NULL", "ON", "NULL"): 3842 null_handling = "NULL ON NULL" 3843 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3844 null_handling = "ABSENT ON NULL" 3845 3846 unique_keys = None 3847 if self._match_text_seq("WITH", "UNIQUE"): 3848 unique_keys = True 3849 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3850 unique_keys = False 3851 3852 self._match_text_seq("KEYS") 3853 3854 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3855 format_json = self._match_text_seq("FORMAT", "JSON") 3856 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3857 3858 return self.expression( 3859 exp.JSONObject, 3860 expressions=expressions, 3861 null_handling=null_handling, 3862 unique_keys=unique_keys, 3863 return_type=return_type, 3864 format_json=format_json, 3865 encoding=encoding, 3866 ) 3867 3868 def _parse_logarithm(self) -> exp.Func: 3869 # Default argument order is base, expression 3870 args = self._parse_csv(self._parse_range) 3871 3872 if len(args) > 1: 3873 if not self.LOG_BASE_FIRST: 3874 args.reverse() 3875 return exp.Log.from_arg_list(args) 3876 3877 return self.expression( 3878 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3879 ) 3880 3881 def _parse_match_against(self) -> exp.MatchAgainst: 3882 expressions = self._parse_csv(self._parse_column) 3883 3884 self._match_text_seq(")", "AGAINST", "(") 3885 3886 this = self._parse_string() 3887 3888 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3889 modifier = "IN NATURAL LANGUAGE MODE" 3890 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3891 modifier = f"{modifier} WITH QUERY EXPANSION" 3892 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3893 modifier = "IN BOOLEAN MODE" 3894 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3895 modifier = "WITH QUERY EXPANSION" 3896 else: 3897 modifier = None 3898 3899 return self.expression( 3900 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3901 ) 3902 3903 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3904 def _parse_open_json(self) -> exp.OpenJSON: 3905 this = self._parse_bitwise() 3906 path = self._match(TokenType.COMMA) and self._parse_string() 3907 3908 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3909 this = self._parse_field(any_token=True) 3910 kind = self._parse_types() 3911 path = self._parse_string() 3912 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3913 3914 return self.expression( 3915 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3916 ) 3917 3918 expressions = None 3919 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3920 self._match_l_paren() 3921 expressions = self._parse_csv(_parse_open_json_column_def) 3922 3923 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3924 3925 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3926 args = self._parse_csv(self._parse_bitwise) 3927 3928 if self._match(TokenType.IN): 3929 return self.expression( 3930 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3931 ) 3932 3933 if haystack_first: 3934 haystack = seq_get(args, 0) 3935 needle = seq_get(args, 1) 3936 else: 3937 needle = seq_get(args, 0) 3938 haystack = seq_get(args, 1) 3939 3940 return self.expression( 3941 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3942 ) 3943 3944 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3945 args = self._parse_csv(self._parse_table) 3946 return exp.JoinHint(this=func_name.upper(), expressions=args) 3947 3948 def _parse_substring(self) -> exp.Substring: 3949 # Postgres supports the form: substring(string [from int] [for int]) 3950 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3951 3952 args = self._parse_csv(self._parse_bitwise) 3953 3954 if self._match(TokenType.FROM): 3955 args.append(self._parse_bitwise()) 3956 if self._match(TokenType.FOR): 3957 args.append(self._parse_bitwise()) 3958 3959 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3960 3961 def _parse_trim(self) -> exp.Trim: 3962 # https://www.w3resource.com/sql/character-functions/trim.php 3963 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3964 3965 position = None 3966 collation = None 3967 3968 if self._match_texts(self.TRIM_TYPES): 3969 position = self._prev.text.upper() 3970 3971 expression = self._parse_bitwise() 3972 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3973 this = self._parse_bitwise() 3974 else: 3975 this = expression 3976 expression = None 3977 3978 if self._match(TokenType.COLLATE): 3979 collation = self._parse_bitwise() 3980 3981 return self.expression( 3982 exp.Trim, this=this, position=position, expression=expression, collation=collation 3983 ) 3984 3985 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3986 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3987 3988 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3989 return self._parse_window(self._parse_id_var(), alias=True) 3990 3991 def _parse_respect_or_ignore_nulls( 3992 self, this: t.Optional[exp.Expression] 3993 ) -> t.Optional[exp.Expression]: 3994 if self._match_text_seq("IGNORE", "NULLS"): 3995 return self.expression(exp.IgnoreNulls, this=this) 3996 if self._match_text_seq("RESPECT", "NULLS"): 3997 return self.expression(exp.RespectNulls, this=this) 3998 return this 3999 4000 def _parse_window( 4001 self, this: t.Optional[exp.Expression], alias: bool = False 4002 ) -> t.Optional[exp.Expression]: 4003 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4004 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4005 self._match_r_paren() 4006 4007 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4008 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4009 if self._match_text_seq("WITHIN", "GROUP"): 4010 order = self._parse_wrapped(self._parse_order) 4011 this = self.expression(exp.WithinGroup, this=this, expression=order) 4012 4013 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4014 # Some dialects choose to implement and some do not. 4015 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4016 4017 # There is some code above in _parse_lambda that handles 4018 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4019 4020 # The below changes handle 4021 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4022 4023 # Oracle allows both formats 4024 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4025 # and Snowflake chose to do the same for familiarity 4026 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4027 this = self._parse_respect_or_ignore_nulls(this) 4028 4029 # bigquery select from window x AS (partition by ...) 4030 if alias: 4031 over = None 4032 self._match(TokenType.ALIAS) 4033 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4034 return this 4035 else: 4036 over = self._prev.text.upper() 4037 4038 if not self._match(TokenType.L_PAREN): 4039 return self.expression( 4040 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4041 ) 4042 4043 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4044 4045 first = self._match(TokenType.FIRST) 4046 if self._match_text_seq("LAST"): 4047 first = False 4048 4049 partition = self._parse_partition_by() 4050 order = self._parse_order() 4051 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4052 4053 if kind: 4054 self._match(TokenType.BETWEEN) 4055 start = self._parse_window_spec() 4056 self._match(TokenType.AND) 4057 end = self._parse_window_spec() 4058 4059 spec = self.expression( 4060 exp.WindowSpec, 4061 kind=kind, 4062 start=start["value"], 4063 start_side=start["side"], 4064 end=end["value"], 4065 end_side=end["side"], 4066 ) 4067 else: 4068 spec = None 4069 4070 self._match_r_paren() 4071 4072 return self.expression( 4073 exp.Window, 4074 this=this, 4075 partition_by=partition, 4076 order=order, 4077 spec=spec, 4078 alias=window_alias, 4079 over=over, 4080 first=first, 4081 ) 4082 4083 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4084 self._match(TokenType.BETWEEN) 4085 4086 return { 4087 "value": ( 4088 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4089 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4090 or self._parse_bitwise() 4091 ), 4092 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4093 } 4094 4095 def _parse_alias( 4096 self, this: t.Optional[exp.Expression], explicit: bool = False 4097 ) -> t.Optional[exp.Expression]: 4098 any_token = self._match(TokenType.ALIAS) 4099 4100 if explicit and not any_token: 4101 return this 4102 4103 if self._match(TokenType.L_PAREN): 4104 aliases = self.expression( 4105 exp.Aliases, 4106 this=this, 4107 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4108 ) 4109 self._match_r_paren(aliases) 4110 return aliases 4111 4112 alias = self._parse_id_var(any_token) 4113 4114 if alias: 4115 return self.expression(exp.Alias, this=this, alias=alias) 4116 4117 return this 4118 4119 def _parse_id_var( 4120 self, 4121 any_token: bool = True, 4122 tokens: t.Optional[t.Collection[TokenType]] = None, 4123 ) -> t.Optional[exp.Expression]: 4124 identifier = self._parse_identifier() 4125 4126 if identifier: 4127 return identifier 4128 4129 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4130 quoted = self._prev.token_type == TokenType.STRING 4131 return exp.Identifier(this=self._prev.text, quoted=quoted) 4132 4133 return None 4134 4135 def _parse_string(self) -> t.Optional[exp.Expression]: 4136 if self._match(TokenType.STRING): 4137 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4138 return self._parse_placeholder() 4139 4140 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4141 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4142 4143 def _parse_number(self) -> t.Optional[exp.Expression]: 4144 if self._match(TokenType.NUMBER): 4145 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4146 return self._parse_placeholder() 4147 4148 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4149 if self._match(TokenType.IDENTIFIER): 4150 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4151 return self._parse_placeholder() 4152 4153 def _parse_var( 4154 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4155 ) -> t.Optional[exp.Expression]: 4156 if ( 4157 (any_token and self._advance_any()) 4158 or self._match(TokenType.VAR) 4159 or (self._match_set(tokens) if tokens else False) 4160 ): 4161 return self.expression(exp.Var, this=self._prev.text) 4162 return self._parse_placeholder() 4163 4164 def _advance_any(self) -> t.Optional[Token]: 4165 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4166 self._advance() 4167 return self._prev 4168 return None 4169 4170 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4171 return self._parse_var() or self._parse_string() 4172 4173 def _parse_null(self) -> t.Optional[exp.Expression]: 4174 if self._match(TokenType.NULL): 4175 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4176 return None 4177 4178 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4179 if self._match(TokenType.TRUE): 4180 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4181 if self._match(TokenType.FALSE): 4182 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4183 return None 4184 4185 def _parse_star(self) -> t.Optional[exp.Expression]: 4186 if self._match(TokenType.STAR): 4187 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4188 return None 4189 4190 def _parse_parameter(self) -> exp.Parameter: 4191 wrapped = self._match(TokenType.L_BRACE) 4192 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4193 self._match(TokenType.R_BRACE) 4194 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4195 4196 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4197 if self._match_set(self.PLACEHOLDER_PARSERS): 4198 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4199 if placeholder: 4200 return placeholder 4201 self._advance(-1) 4202 return None 4203 4204 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4205 if not self._match(TokenType.EXCEPT): 4206 return None 4207 if self._match(TokenType.L_PAREN, advance=False): 4208 return self._parse_wrapped_csv(self._parse_column) 4209 return self._parse_csv(self._parse_column) 4210 4211 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4212 if not self._match(TokenType.REPLACE): 4213 return None 4214 if self._match(TokenType.L_PAREN, advance=False): 4215 return self._parse_wrapped_csv(self._parse_expression) 4216 return self._parse_csv(self._parse_expression) 4217 4218 def _parse_csv( 4219 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4220 ) -> t.List[t.Optional[exp.Expression]]: 4221 parse_result = parse_method() 4222 items = [parse_result] if parse_result is not None else [] 4223 4224 while self._match(sep): 4225 self._add_comments(parse_result) 4226 parse_result = parse_method() 4227 if parse_result is not None: 4228 items.append(parse_result) 4229 4230 return items 4231 4232 def _parse_tokens( 4233 self, parse_method: t.Callable, expressions: t.Dict 4234 ) -> t.Optional[exp.Expression]: 4235 this = parse_method() 4236 4237 while self._match_set(expressions): 4238 this = self.expression( 4239 expressions[self._prev.token_type], 4240 this=this, 4241 comments=self._prev_comments, 4242 expression=parse_method(), 4243 ) 4244 4245 return this 4246 4247 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4248 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4249 4250 def _parse_wrapped_csv( 4251 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4252 ) -> t.List[t.Optional[exp.Expression]]: 4253 return self._parse_wrapped( 4254 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4255 ) 4256 4257 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4258 wrapped = self._match(TokenType.L_PAREN) 4259 if not wrapped and not optional: 4260 self.raise_error("Expecting (") 4261 parse_result = parse_method() 4262 if wrapped: 4263 self._match_r_paren() 4264 return parse_result 4265 4266 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4267 return self._parse_select() or self._parse_set_operations( 4268 self._parse_expression() if alias else self._parse_conjunction() 4269 ) 4270 4271 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4272 return self._parse_query_modifiers( 4273 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4274 ) 4275 4276 def _parse_transaction(self) -> exp.Transaction: 4277 this = None 4278 if self._match_texts(self.TRANSACTION_KIND): 4279 this = self._prev.text 4280 4281 self._match_texts({"TRANSACTION", "WORK"}) 4282 4283 modes = [] 4284 while True: 4285 mode = [] 4286 while self._match(TokenType.VAR): 4287 mode.append(self._prev.text) 4288 4289 if mode: 4290 modes.append(" ".join(mode)) 4291 if not self._match(TokenType.COMMA): 4292 break 4293 4294 return self.expression(exp.Transaction, this=this, modes=modes) 4295 4296 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4297 chain = None 4298 savepoint = None 4299 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4300 4301 self._match_texts({"TRANSACTION", "WORK"}) 4302 4303 if self._match_text_seq("TO"): 4304 self._match_text_seq("SAVEPOINT") 4305 savepoint = self._parse_id_var() 4306 4307 if self._match(TokenType.AND): 4308 chain = not self._match_text_seq("NO") 4309 self._match_text_seq("CHAIN") 4310 4311 if is_rollback: 4312 return self.expression(exp.Rollback, savepoint=savepoint) 4313 4314 return self.expression(exp.Commit, chain=chain) 4315 4316 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4317 if not self._match_text_seq("ADD"): 4318 return None 4319 4320 self._match(TokenType.COLUMN) 4321 exists_column = self._parse_exists(not_=True) 4322 expression = self._parse_column_def(self._parse_field(any_token=True)) 4323 4324 if expression: 4325 expression.set("exists", exists_column) 4326 4327 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4328 if self._match_texts(("FIRST", "AFTER")): 4329 position = self._prev.text 4330 column_position = self.expression( 4331 exp.ColumnPosition, this=self._parse_column(), position=position 4332 ) 4333 expression.set("position", column_position) 4334 4335 return expression 4336 4337 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4338 drop = self._match(TokenType.DROP) and self._parse_drop() 4339 if drop and not isinstance(drop, exp.Command): 4340 drop.set("kind", drop.args.get("kind", "COLUMN")) 4341 return drop 4342 4343 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4344 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4345 return self.expression( 4346 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4347 ) 4348 4349 def _parse_add_constraint(self) -> exp.AddConstraint: 4350 this = None 4351 kind = self._prev.token_type 4352 4353 if kind == TokenType.CONSTRAINT: 4354 this = self._parse_id_var() 4355 4356 if self._match_text_seq("CHECK"): 4357 expression = self._parse_wrapped(self._parse_conjunction) 4358 enforced = self._match_text_seq("ENFORCED") 4359 4360 return self.expression( 4361 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4362 ) 4363 4364 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4365 expression = self._parse_foreign_key() 4366 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4367 expression = self._parse_primary_key() 4368 else: 4369 expression = None 4370 4371 return self.expression(exp.AddConstraint, this=this, expression=expression) 4372 4373 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4374 index = self._index - 1 4375 4376 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4377 return self._parse_csv(self._parse_add_constraint) 4378 4379 self._retreat(index) 4380 return self._parse_csv(self._parse_add_column) 4381 4382 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4383 self._match(TokenType.COLUMN) 4384 column = self._parse_field(any_token=True) 4385 4386 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4387 return self.expression(exp.AlterColumn, this=column, drop=True) 4388 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4389 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4390 4391 self._match_text_seq("SET", "DATA") 4392 return self.expression( 4393 exp.AlterColumn, 4394 this=column, 4395 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4396 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4397 using=self._match(TokenType.USING) and self._parse_conjunction(), 4398 ) 4399 4400 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4401 index = self._index - 1 4402 4403 partition_exists = self._parse_exists() 4404 if self._match(TokenType.PARTITION, advance=False): 4405 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4406 4407 self._retreat(index) 4408 return self._parse_csv(self._parse_drop_column) 4409 4410 def _parse_alter_table_rename(self) -> exp.RenameTable: 4411 self._match_text_seq("TO") 4412 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4413 4414 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4415 start = self._prev 4416 4417 if not self._match(TokenType.TABLE): 4418 return self._parse_as_command(start) 4419 4420 exists = self._parse_exists() 4421 this = self._parse_table(schema=True) 4422 4423 if self._next: 4424 self._advance() 4425 4426 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4427 if parser: 4428 actions = ensure_list(parser(self)) 4429 4430 if not self._curr: 4431 return self.expression( 4432 exp.AlterTable, 4433 this=this, 4434 exists=exists, 4435 actions=actions, 4436 ) 4437 return self._parse_as_command(start) 4438 4439 def _parse_merge(self) -> exp.Merge: 4440 self._match(TokenType.INTO) 4441 target = self._parse_table() 4442 4443 self._match(TokenType.USING) 4444 using = self._parse_table() 4445 4446 self._match(TokenType.ON) 4447 on = self._parse_conjunction() 4448 4449 whens = [] 4450 while self._match(TokenType.WHEN): 4451 matched = not self._match(TokenType.NOT) 4452 self._match_text_seq("MATCHED") 4453 source = ( 4454 False 4455 if self._match_text_seq("BY", "TARGET") 4456 else self._match_text_seq("BY", "SOURCE") 4457 ) 4458 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4459 4460 self._match(TokenType.THEN) 4461 4462 if self._match(TokenType.INSERT): 4463 _this = self._parse_star() 4464 if _this: 4465 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4466 else: 4467 then = self.expression( 4468 exp.Insert, 4469 this=self._parse_value(), 4470 expression=self._match(TokenType.VALUES) and self._parse_value(), 4471 ) 4472 elif self._match(TokenType.UPDATE): 4473 expressions = self._parse_star() 4474 if expressions: 4475 then = self.expression(exp.Update, expressions=expressions) 4476 else: 4477 then = self.expression( 4478 exp.Update, 4479 expressions=self._match(TokenType.SET) 4480 and self._parse_csv(self._parse_equality), 4481 ) 4482 elif self._match(TokenType.DELETE): 4483 then = self.expression(exp.Var, this=self._prev.text) 4484 else: 4485 then = None 4486 4487 whens.append( 4488 self.expression( 4489 exp.When, 4490 matched=matched, 4491 source=source, 4492 condition=condition, 4493 then=then, 4494 ) 4495 ) 4496 4497 return self.expression( 4498 exp.Merge, 4499 this=target, 4500 using=using, 4501 on=on, 4502 expressions=whens, 4503 ) 4504 4505 def _parse_show(self) -> t.Optional[exp.Expression]: 4506 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4507 if parser: 4508 return parser(self) 4509 self._advance() 4510 return self.expression(exp.Show, this=self._prev.text.upper()) 4511 4512 def _parse_set_item_assignment( 4513 self, kind: t.Optional[str] = None 4514 ) -> t.Optional[exp.Expression]: 4515 index = self._index 4516 4517 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4518 return self._parse_set_transaction(global_=kind == "GLOBAL") 4519 4520 left = self._parse_primary() or self._parse_id_var() 4521 4522 if not self._match_texts(("=", "TO")): 4523 self._retreat(index) 4524 return None 4525 4526 right = self._parse_statement() or self._parse_id_var() 4527 this = self.expression(exp.EQ, this=left, expression=right) 4528 4529 return self.expression(exp.SetItem, this=this, kind=kind) 4530 4531 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4532 self._match_text_seq("TRANSACTION") 4533 characteristics = self._parse_csv( 4534 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4535 ) 4536 return self.expression( 4537 exp.SetItem, 4538 expressions=characteristics, 4539 kind="TRANSACTION", 4540 **{"global": global_}, # type: ignore 4541 ) 4542 4543 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4544 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4545 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4546 4547 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4548 index = self._index 4549 set_ = self.expression( 4550 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4551 ) 4552 4553 if self._curr: 4554 self._retreat(index) 4555 return self._parse_as_command(self._prev) 4556 4557 return set_ 4558 4559 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4560 for option in options: 4561 if self._match_text_seq(*option.split(" ")): 4562 return exp.var(option) 4563 return None 4564 4565 def _parse_as_command(self, start: Token) -> exp.Command: 4566 while self._curr: 4567 self._advance() 4568 text = self._find_sql(start, self._prev) 4569 size = len(start.text) 4570 return exp.Command(this=text[:size], expression=text[size:]) 4571 4572 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4573 settings = [] 4574 4575 self._match_l_paren() 4576 kind = self._parse_id_var() 4577 4578 if self._match(TokenType.L_PAREN): 4579 while True: 4580 key = self._parse_id_var() 4581 value = self._parse_primary() 4582 4583 if not key and value is None: 4584 break 4585 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4586 self._match(TokenType.R_PAREN) 4587 4588 self._match_r_paren() 4589 4590 return self.expression( 4591 exp.DictProperty, 4592 this=this, 4593 kind=kind.this if kind else None, 4594 settings=settings, 4595 ) 4596 4597 def _parse_dict_range(self, this: str) -> exp.DictRange: 4598 self._match_l_paren() 4599 has_min = self._match_text_seq("MIN") 4600 if has_min: 4601 min = self._parse_var() or self._parse_primary() 4602 self._match_text_seq("MAX") 4603 max = self._parse_var() or self._parse_primary() 4604 else: 4605 max = self._parse_var() or self._parse_primary() 4606 min = exp.Literal.number(0) 4607 self._match_r_paren() 4608 return self.expression(exp.DictRange, this=this, min=min, max=max) 4609 4610 def _find_parser( 4611 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4612 ) -> t.Optional[t.Callable]: 4613 if not self._curr: 4614 return None 4615 4616 index = self._index 4617 this = [] 4618 while True: 4619 # The current token might be multiple words 4620 curr = self._curr.text.upper() 4621 key = curr.split(" ") 4622 this.append(curr) 4623 4624 self._advance() 4625 result, trie = in_trie(trie, key) 4626 if result == TrieResult.FAILED: 4627 break 4628 4629 if result == TrieResult.EXISTS: 4630 subparser = parsers[" ".join(this)] 4631 return subparser 4632 4633 self._retreat(index) 4634 return None 4635 4636 def _match(self, token_type, advance=True, expression=None): 4637 if not self._curr: 4638 return None 4639 4640 if self._curr.token_type == token_type: 4641 if advance: 4642 self._advance() 4643 self._add_comments(expression) 4644 return True 4645 4646 return None 4647 4648 def _match_set(self, types, advance=True): 4649 if not self._curr: 4650 return None 4651 4652 if self._curr.token_type in types: 4653 if advance: 4654 self._advance() 4655 return True 4656 4657 return None 4658 4659 def _match_pair(self, token_type_a, token_type_b, advance=True): 4660 if not self._curr or not self._next: 4661 return None 4662 4663 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4664 if advance: 4665 self._advance(2) 4666 return True 4667 4668 return None 4669 4670 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4671 if not self._match(TokenType.L_PAREN, expression=expression): 4672 self.raise_error("Expecting (") 4673 4674 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4675 if not self._match(TokenType.R_PAREN, expression=expression): 4676 self.raise_error("Expecting )") 4677 4678 def _match_texts(self, texts, advance=True): 4679 if self._curr and self._curr.text.upper() in texts: 4680 if advance: 4681 self._advance() 4682 return True 4683 return False 4684 4685 def _match_text_seq(self, *texts, advance=True): 4686 index = self._index 4687 for text in texts: 4688 if self._curr and self._curr.text.upper() == text: 4689 self._advance() 4690 else: 4691 self._retreat(index) 4692 return False 4693 4694 if not advance: 4695 self._retreat(index) 4696 4697 return True 4698 4699 @t.overload 4700 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4701 ... 4702 4703 @t.overload 4704 def _replace_columns_with_dots( 4705 self, this: t.Optional[exp.Expression] 4706 ) -> t.Optional[exp.Expression]: 4707 ... 4708 4709 def _replace_columns_with_dots(self, this): 4710 if isinstance(this, exp.Dot): 4711 exp.replace_children(this, self._replace_columns_with_dots) 4712 elif isinstance(this, exp.Column): 4713 exp.replace_children(this, self._replace_columns_with_dots) 4714 table = this.args.get("table") 4715 this = ( 4716 self.expression(exp.Dot, this=table, expression=this.this) 4717 if table 4718 else self.expression(exp.Var, this=this.name) 4719 ) 4720 elif isinstance(this, exp.Identifier): 4721 this = self.expression(exp.Var, this=this.name) 4722 4723 return this 4724 4725 def _replace_lambda( 4726 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4727 ) -> t.Optional[exp.Expression]: 4728 if not node: 4729 return node 4730 4731 for column in node.find_all(exp.Column): 4732 if column.parts[0].name in lambda_variables: 4733 dot_or_id = column.to_dot() if column.table else column.this 4734 parent = column.parent 4735 4736 while isinstance(parent, exp.Dot): 4737 if not isinstance(parent.parent, exp.Dot): 4738 parent.replace(dot_or_id) 4739 break 4740 parent = parent.parent 4741 else: 4742 if column is node: 4743 node = dot_or_id 4744 else: 4745 column.replace(dot_or_id) 4746 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 "joins": lambda self: list(iter(self._parse_join, None)), 741 "laterals": lambda self: list(iter(self._parse_lateral, None)), 742 "match": lambda self: self._parse_match_recognize(), 743 "where": lambda self: self._parse_where(), 744 "group": lambda self: self._parse_group(), 745 "having": lambda self: self._parse_having(), 746 "qualify": lambda self: self._parse_qualify(), 747 "windows": lambda self: self._parse_window_clause(), 748 "order": lambda self: self._parse_order(), 749 "limit": lambda self: self._parse_limit(), 750 "offset": lambda self: self._parse_offset(), 751 "locks": lambda self: self._parse_locks(), 752 "sample": lambda self: self._parse_table_sample(as_modifier=True), 753 } 754 755 SET_PARSERS = { 756 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 757 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 758 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 759 "TRANSACTION": lambda self: self._parse_set_transaction(), 760 } 761 762 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 763 764 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 765 766 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 767 768 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 769 770 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 771 772 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 773 TRANSACTION_CHARACTERISTICS = { 774 "ISOLATION LEVEL REPEATABLE READ", 775 "ISOLATION LEVEL READ COMMITTED", 776 "ISOLATION LEVEL READ UNCOMMITTED", 777 "ISOLATION LEVEL SERIALIZABLE", 778 "READ WRITE", 779 "READ ONLY", 780 } 781 782 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 783 784 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 785 786 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 787 788 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 789 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 790 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 791 792 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 793 794 STRICT_CAST = True 795 796 # A NULL arg in CONCAT yields NULL by default 797 CONCAT_NULL_OUTPUTS_STRING = False 798 799 PREFIXED_PIVOT_COLUMNS = False 800 IDENTIFY_PIVOT_STRINGS = False 801 802 LOG_BASE_FIRST = True 803 LOG_DEFAULTS_TO_LN = False 804 805 __slots__ = ( 806 "error_level", 807 "error_message_context", 808 "max_errors", 809 "sql", 810 "errors", 811 "_tokens", 812 "_index", 813 "_curr", 814 "_next", 815 "_prev", 816 "_prev_comments", 817 ) 818 819 # Autofilled 820 INDEX_OFFSET: int = 0 821 UNNEST_COLUMN_ONLY: bool = False 822 ALIAS_POST_TABLESAMPLE: bool = False 823 STRICT_STRING_CONCAT = False 824 NULL_ORDERING: str = "nulls_are_small" 825 SHOW_TRIE: t.Dict = {} 826 SET_TRIE: t.Dict = {} 827 FORMAT_MAPPING: t.Dict[str, str] = {} 828 FORMAT_TRIE: t.Dict = {} 829 TIME_MAPPING: t.Dict[str, str] = {} 830 TIME_TRIE: t.Dict = {} 831 832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset() 842 843 def reset(self): 844 self.sql = "" 845 self.errors = [] 846 self._tokens = [] 847 self._index = 0 848 self._curr = None 849 self._next = None 850 self._prev = None 851 self._prev_comments = None 852 853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 ) 870 871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1] 906 907 def _parse( 908 self, 909 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 910 raw_tokens: t.List[Token], 911 sql: t.Optional[str] = None, 912 ) -> t.List[t.Optional[exp.Expression]]: 913 self.reset() 914 self.sql = sql or "" 915 916 total = len(raw_tokens) 917 chunks: t.List[t.List[Token]] = [[]] 918 919 for i, token in enumerate(raw_tokens): 920 if token.token_type == TokenType.SEMICOLON: 921 if i < total - 1: 922 chunks.append([]) 923 else: 924 chunks[-1].append(token) 925 926 expressions = [] 927 928 for tokens in chunks: 929 self._index = -1 930 self._tokens = tokens 931 self._advance() 932 933 expressions.append(parse_method(self)) 934 935 if self._index < len(self._tokens): 936 self.raise_error("Invalid expression / Unexpected token") 937 938 self.check_errors() 939 940 return expressions 941 942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 ) 952 953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error) 980 981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance) 998 999 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1000 if expression and self._prev_comments: 1001 expression.add_comments(self._prev_comments) 1002 self._prev_comments = None 1003 1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression 1020 1021 def _find_sql(self, start: Token, end: Token) -> str: 1022 return self.sql[start.start : end.end + 1] 1023 1024 def _advance(self, times: int = 1) -> None: 1025 self._index += times 1026 self._curr = seq_get(self._tokens, self._index) 1027 self._next = seq_get(self._tokens, self._index + 1) 1028 1029 if self._index > 0: 1030 self._prev = self._tokens[self._index - 1] 1031 self._prev_comments = self._prev.comments 1032 else: 1033 self._prev = None 1034 self._prev_comments = None 1035 1036 def _retreat(self, index: int) -> None: 1037 if index != self._index: 1038 self._advance(index - self._index) 1039 1040 def _parse_command(self) -> exp.Command: 1041 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1042 1043 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1044 start = self._prev 1045 exists = self._parse_exists() if allow_exists else None 1046 1047 self._match(TokenType.ON) 1048 1049 kind = self._match_set(self.CREATABLES) and self._prev 1050 if not kind: 1051 return self._parse_as_command(start) 1052 1053 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1054 this = self._parse_user_defined_function(kind=kind.token_type) 1055 elif kind.token_type == TokenType.TABLE: 1056 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1057 elif kind.token_type == TokenType.COLUMN: 1058 this = self._parse_column() 1059 else: 1060 this = self._parse_id_var() 1061 1062 self._match(TokenType.IS) 1063 1064 return self.expression( 1065 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1066 ) 1067 1068 def _parse_to_table( 1069 self, 1070 ) -> exp.ToTableProperty: 1071 table = self._parse_table_parts(schema=True) 1072 return self.expression(exp.ToTableProperty, this=table) 1073 1074 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1075 def _parse_ttl(self) -> exp.Expression: 1076 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1077 this = self._parse_bitwise() 1078 1079 if self._match_text_seq("DELETE"): 1080 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1081 if self._match_text_seq("RECOMPRESS"): 1082 return self.expression( 1083 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1084 ) 1085 if self._match_text_seq("TO", "DISK"): 1086 return self.expression( 1087 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1088 ) 1089 if self._match_text_seq("TO", "VOLUME"): 1090 return self.expression( 1091 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1092 ) 1093 1094 return this 1095 1096 expressions = self._parse_csv(_parse_ttl_action) 1097 where = self._parse_where() 1098 group = self._parse_group() 1099 1100 aggregates = None 1101 if group and self._match(TokenType.SET): 1102 aggregates = self._parse_csv(self._parse_set_item) 1103 1104 return self.expression( 1105 exp.MergeTreeTTL, 1106 expressions=expressions, 1107 where=where, 1108 group=group, 1109 aggregates=aggregates, 1110 ) 1111 1112 def _parse_statement(self) -> t.Optional[exp.Expression]: 1113 if self._curr is None: 1114 return None 1115 1116 if self._match_set(self.STATEMENT_PARSERS): 1117 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1118 1119 if self._match_set(Tokenizer.COMMANDS): 1120 return self._parse_command() 1121 1122 expression = self._parse_expression() 1123 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1124 return self._parse_query_modifiers(expression) 1125 1126 def _parse_drop(self) -> exp.Drop | exp.Command: 1127 start = self._prev 1128 temporary = self._match(TokenType.TEMPORARY) 1129 materialized = self._match_text_seq("MATERIALIZED") 1130 1131 kind = self._match_set(self.CREATABLES) and self._prev.text 1132 if not kind: 1133 return self._parse_as_command(start) 1134 1135 return self.expression( 1136 exp.Drop, 1137 exists=self._parse_exists(), 1138 this=self._parse_table(schema=True), 1139 kind=kind, 1140 temporary=temporary, 1141 materialized=materialized, 1142 cascade=self._match_text_seq("CASCADE"), 1143 constraints=self._match_text_seq("CONSTRAINTS"), 1144 purge=self._match_text_seq("PURGE"), 1145 ) 1146 1147 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1148 return ( 1149 self._match(TokenType.IF) 1150 and (not not_ or self._match(TokenType.NOT)) 1151 and self._match(TokenType.EXISTS) 1152 ) 1153 1154 def _parse_create(self) -> exp.Create | exp.Command: 1155 # Note: this can't be None because we've matched a statement parser 1156 start = self._prev 1157 replace = start.text.upper() == "REPLACE" or self._match_pair( 1158 TokenType.OR, TokenType.REPLACE 1159 ) 1160 unique = self._match(TokenType.UNIQUE) 1161 1162 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1163 self._advance() 1164 1165 properties = None 1166 create_token = self._match_set(self.CREATABLES) and self._prev 1167 1168 if not create_token: 1169 # exp.Properties.Location.POST_CREATE 1170 properties = self._parse_properties() 1171 create_token = self._match_set(self.CREATABLES) and self._prev 1172 1173 if not properties or not create_token: 1174 return self._parse_as_command(start) 1175 1176 exists = self._parse_exists(not_=True) 1177 this = None 1178 expression = None 1179 indexes = None 1180 no_schema_binding = None 1181 begin = None 1182 clone = None 1183 1184 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1185 nonlocal properties 1186 if properties and temp_props: 1187 properties.expressions.extend(temp_props.expressions) 1188 elif temp_props: 1189 properties = temp_props 1190 1191 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1192 this = self._parse_user_defined_function(kind=create_token.token_type) 1193 1194 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1195 extend_props(self._parse_properties()) 1196 1197 self._match(TokenType.ALIAS) 1198 begin = self._match(TokenType.BEGIN) 1199 return_ = self._match_text_seq("RETURN") 1200 expression = self._parse_statement() 1201 1202 if return_: 1203 expression = self.expression(exp.Return, this=expression) 1204 elif create_token.token_type == TokenType.INDEX: 1205 this = self._parse_index(index=self._parse_id_var()) 1206 elif create_token.token_type in self.DB_CREATABLES: 1207 table_parts = self._parse_table_parts(schema=True) 1208 1209 # exp.Properties.Location.POST_NAME 1210 self._match(TokenType.COMMA) 1211 extend_props(self._parse_properties(before=True)) 1212 1213 this = self._parse_schema(this=table_parts) 1214 1215 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1216 extend_props(self._parse_properties()) 1217 1218 self._match(TokenType.ALIAS) 1219 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1220 # exp.Properties.Location.POST_ALIAS 1221 extend_props(self._parse_properties()) 1222 1223 expression = self._parse_ddl_select() 1224 1225 if create_token.token_type == TokenType.TABLE: 1226 indexes = [] 1227 while True: 1228 index = self._parse_index() 1229 1230 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1231 extend_props(self._parse_properties()) 1232 1233 if not index: 1234 break 1235 else: 1236 self._match(TokenType.COMMA) 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 # only used for teradata currently 1273 self._match(TokenType.COMMA) 1274 1275 kwargs = { 1276 "no": self._match_text_seq("NO"), 1277 "dual": self._match_text_seq("DUAL"), 1278 "before": self._match_text_seq("BEFORE"), 1279 "default": self._match_text_seq("DEFAULT"), 1280 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1281 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1282 "after": self._match_text_seq("AFTER"), 1283 "minimum": self._match_texts(("MIN", "MINIMUM")), 1284 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1285 } 1286 1287 if self._match_texts(self.PROPERTY_PARSERS): 1288 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1289 try: 1290 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1291 except TypeError: 1292 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1293 1294 return None 1295 1296 def _parse_property(self) -> t.Optional[exp.Expression]: 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1299 1300 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1301 return self._parse_character_set(default=True) 1302 1303 if self._match_text_seq("COMPOUND", "SORTKEY"): 1304 return self._parse_sortkey(compound=True) 1305 1306 if self._match_text_seq("SQL", "SECURITY"): 1307 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1308 1309 assignment = self._match_pair( 1310 TokenType.VAR, TokenType.EQ, advance=False 1311 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1312 1313 if assignment: 1314 key = self._parse_var_or_string() 1315 self._match(TokenType.EQ) 1316 return self.expression(exp.Property, this=key, value=self._parse_column()) 1317 1318 return None 1319 1320 def _parse_stored(self) -> exp.FileFormatProperty: 1321 self._match(TokenType.ALIAS) 1322 1323 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1324 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1325 1326 return self.expression( 1327 exp.FileFormatProperty, 1328 this=self.expression( 1329 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1330 ) 1331 if input_format or output_format 1332 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1333 ) 1334 1335 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1336 self._match(TokenType.EQ) 1337 self._match(TokenType.ALIAS) 1338 return self.expression(exp_class, this=self._parse_field()) 1339 1340 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1341 properties = [] 1342 while True: 1343 if before: 1344 prop = self._parse_property_before() 1345 else: 1346 prop = self._parse_property() 1347 1348 if not prop: 1349 break 1350 for p in ensure_list(prop): 1351 properties.append(p) 1352 1353 if properties: 1354 return self.expression(exp.Properties, expressions=properties) 1355 1356 return None 1357 1358 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1359 return self.expression( 1360 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1361 ) 1362 1363 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1364 if self._index >= 2: 1365 pre_volatile_token = self._tokens[self._index - 2] 1366 else: 1367 pre_volatile_token = None 1368 1369 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1370 return exp.VolatileProperty() 1371 1372 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1373 1374 def _parse_with_property( 1375 self, 1376 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1377 self._match(TokenType.WITH) 1378 if self._match(TokenType.L_PAREN, advance=False): 1379 return self._parse_wrapped_csv(self._parse_property) 1380 1381 if self._match_text_seq("JOURNAL"): 1382 return self._parse_withjournaltable() 1383 1384 if self._match_text_seq("DATA"): 1385 return self._parse_withdata(no=False) 1386 elif self._match_text_seq("NO", "DATA"): 1387 return self._parse_withdata(no=True) 1388 1389 if not self._next: 1390 return None 1391 1392 return self._parse_withisolatedloading() 1393 1394 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1395 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1396 self._match(TokenType.EQ) 1397 1398 user = self._parse_id_var() 1399 self._match(TokenType.PARAMETER) 1400 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1401 1402 if not user or not host: 1403 return None 1404 1405 return exp.DefinerProperty(this=f"{user}@{host}") 1406 1407 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1408 self._match(TokenType.TABLE) 1409 self._match(TokenType.EQ) 1410 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1411 1412 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1413 return self.expression(exp.LogProperty, no=no) 1414 1415 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1416 return self.expression(exp.JournalProperty, **kwargs) 1417 1418 def _parse_checksum(self) -> exp.ChecksumProperty: 1419 self._match(TokenType.EQ) 1420 1421 on = None 1422 if self._match(TokenType.ON): 1423 on = True 1424 elif self._match_text_seq("OFF"): 1425 on = False 1426 1427 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1428 1429 def _parse_cluster(self) -> exp.Cluster: 1430 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1431 1432 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1433 self._match_text_seq("BY") 1434 1435 self._match_l_paren() 1436 expressions = self._parse_csv(self._parse_column) 1437 self._match_r_paren() 1438 1439 if self._match_text_seq("SORTED", "BY"): 1440 self._match_l_paren() 1441 sorted_by = self._parse_csv(self._parse_ordered) 1442 self._match_r_paren() 1443 else: 1444 sorted_by = None 1445 1446 self._match(TokenType.INTO) 1447 buckets = self._parse_number() 1448 self._match_text_seq("BUCKETS") 1449 1450 return self.expression( 1451 exp.ClusteredByProperty, 1452 expressions=expressions, 1453 sorted_by=sorted_by, 1454 buckets=buckets, 1455 ) 1456 1457 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1458 if not self._match_text_seq("GRANTS"): 1459 self._retreat(self._index - 1) 1460 return None 1461 1462 return self.expression(exp.CopyGrantsProperty) 1463 1464 def _parse_freespace(self) -> exp.FreespaceProperty: 1465 self._match(TokenType.EQ) 1466 return self.expression( 1467 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1468 ) 1469 1470 def _parse_mergeblockratio( 1471 self, no: bool = False, default: bool = False 1472 ) -> exp.MergeBlockRatioProperty: 1473 if self._match(TokenType.EQ): 1474 return self.expression( 1475 exp.MergeBlockRatioProperty, 1476 this=self._parse_number(), 1477 percent=self._match(TokenType.PERCENT), 1478 ) 1479 1480 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1481 1482 def _parse_datablocksize( 1483 self, 1484 default: t.Optional[bool] = None, 1485 minimum: t.Optional[bool] = None, 1486 maximum: t.Optional[bool] = None, 1487 ) -> exp.DataBlocksizeProperty: 1488 self._match(TokenType.EQ) 1489 size = self._parse_number() 1490 1491 units = None 1492 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1493 units = self._prev.text 1494 1495 return self.expression( 1496 exp.DataBlocksizeProperty, 1497 size=size, 1498 units=units, 1499 default=default, 1500 minimum=minimum, 1501 maximum=maximum, 1502 ) 1503 1504 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1505 self._match(TokenType.EQ) 1506 always = self._match_text_seq("ALWAYS") 1507 manual = self._match_text_seq("MANUAL") 1508 never = self._match_text_seq("NEVER") 1509 default = self._match_text_seq("DEFAULT") 1510 1511 autotemp = None 1512 if self._match_text_seq("AUTOTEMP"): 1513 autotemp = self._parse_schema() 1514 1515 return self.expression( 1516 exp.BlockCompressionProperty, 1517 always=always, 1518 manual=manual, 1519 never=never, 1520 default=default, 1521 autotemp=autotemp, 1522 ) 1523 1524 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1525 no = self._match_text_seq("NO") 1526 concurrent = self._match_text_seq("CONCURRENT") 1527 self._match_text_seq("ISOLATED", "LOADING") 1528 for_all = self._match_text_seq("FOR", "ALL") 1529 for_insert = self._match_text_seq("FOR", "INSERT") 1530 for_none = self._match_text_seq("FOR", "NONE") 1531 return self.expression( 1532 exp.IsolatedLoadingProperty, 1533 no=no, 1534 concurrent=concurrent, 1535 for_all=for_all, 1536 for_insert=for_insert, 1537 for_none=for_none, 1538 ) 1539 1540 def _parse_locking(self) -> exp.LockingProperty: 1541 if self._match(TokenType.TABLE): 1542 kind = "TABLE" 1543 elif self._match(TokenType.VIEW): 1544 kind = "VIEW" 1545 elif self._match(TokenType.ROW): 1546 kind = "ROW" 1547 elif self._match_text_seq("DATABASE"): 1548 kind = "DATABASE" 1549 else: 1550 kind = None 1551 1552 if kind in ("DATABASE", "TABLE", "VIEW"): 1553 this = self._parse_table_parts() 1554 else: 1555 this = None 1556 1557 if self._match(TokenType.FOR): 1558 for_or_in = "FOR" 1559 elif self._match(TokenType.IN): 1560 for_or_in = "IN" 1561 else: 1562 for_or_in = None 1563 1564 if self._match_text_seq("ACCESS"): 1565 lock_type = "ACCESS" 1566 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1567 lock_type = "EXCLUSIVE" 1568 elif self._match_text_seq("SHARE"): 1569 lock_type = "SHARE" 1570 elif self._match_text_seq("READ"): 1571 lock_type = "READ" 1572 elif self._match_text_seq("WRITE"): 1573 lock_type = "WRITE" 1574 elif self._match_text_seq("CHECKSUM"): 1575 lock_type = "CHECKSUM" 1576 else: 1577 lock_type = None 1578 1579 override = self._match_text_seq("OVERRIDE") 1580 1581 return self.expression( 1582 exp.LockingProperty, 1583 this=this, 1584 kind=kind, 1585 for_or_in=for_or_in, 1586 lock_type=lock_type, 1587 override=override, 1588 ) 1589 1590 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1591 if self._match(TokenType.PARTITION_BY): 1592 return self._parse_csv(self._parse_conjunction) 1593 return [] 1594 1595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1596 self._match(TokenType.EQ) 1597 return self.expression( 1598 exp.PartitionedByProperty, 1599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1600 ) 1601 1602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1603 if self._match_text_seq("AND", "STATISTICS"): 1604 statistics = True 1605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1606 statistics = False 1607 else: 1608 statistics = None 1609 1610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1611 1612 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1613 if self._match_text_seq("PRIMARY", "INDEX"): 1614 return exp.NoPrimaryIndexProperty() 1615 return None 1616 1617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1618 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1619 return exp.OnCommitProperty() 1620 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1621 return exp.OnCommitProperty(delete=True) 1622 return None 1623 1624 def _parse_distkey(self) -> exp.DistKeyProperty: 1625 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1626 1627 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1628 table = self._parse_table(schema=True) 1629 1630 options = [] 1631 while self._match_texts(("INCLUDING", "EXCLUDING")): 1632 this = self._prev.text.upper() 1633 1634 id_var = self._parse_id_var() 1635 if not id_var: 1636 return None 1637 1638 options.append( 1639 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1640 ) 1641 1642 return self.expression(exp.LikeProperty, this=table, expressions=options) 1643 1644 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1645 return self.expression( 1646 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1647 ) 1648 1649 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1650 self._match(TokenType.EQ) 1651 return self.expression( 1652 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1653 ) 1654 1655 def _parse_returns(self) -> exp.ReturnsProperty: 1656 value: t.Optional[exp.Expression] 1657 is_table = self._match(TokenType.TABLE) 1658 1659 if is_table: 1660 if self._match(TokenType.LT): 1661 value = self.expression( 1662 exp.Schema, 1663 this="TABLE", 1664 expressions=self._parse_csv(self._parse_struct_types), 1665 ) 1666 if not self._match(TokenType.GT): 1667 self.raise_error("Expecting >") 1668 else: 1669 value = self._parse_schema(exp.var("TABLE")) 1670 else: 1671 value = self._parse_types() 1672 1673 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1674 1675 def _parse_describe(self) -> exp.Describe: 1676 kind = self._match_set(self.CREATABLES) and self._prev.text 1677 this = self._parse_table() 1678 return self.expression(exp.Describe, this=this, kind=kind) 1679 1680 def _parse_insert(self) -> exp.Insert: 1681 overwrite = self._match(TokenType.OVERWRITE) 1682 local = self._match_text_seq("LOCAL") 1683 alternative = None 1684 1685 if self._match_text_seq("DIRECTORY"): 1686 this: t.Optional[exp.Expression] = self.expression( 1687 exp.Directory, 1688 this=self._parse_var_or_string(), 1689 local=local, 1690 row_format=self._parse_row_format(match_row=True), 1691 ) 1692 else: 1693 if self._match(TokenType.OR): 1694 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1695 1696 self._match(TokenType.INTO) 1697 self._match(TokenType.TABLE) 1698 this = self._parse_table(schema=True) 1699 1700 return self.expression( 1701 exp.Insert, 1702 this=this, 1703 exists=self._parse_exists(), 1704 partition=self._parse_partition(), 1705 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1706 and self._parse_conjunction(), 1707 expression=self._parse_ddl_select(), 1708 conflict=self._parse_on_conflict(), 1709 returning=self._parse_returning(), 1710 overwrite=overwrite, 1711 alternative=alternative, 1712 ) 1713 1714 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1715 conflict = self._match_text_seq("ON", "CONFLICT") 1716 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1717 1718 if not conflict and not duplicate: 1719 return None 1720 1721 nothing = None 1722 expressions = None 1723 key = None 1724 constraint = None 1725 1726 if conflict: 1727 if self._match_text_seq("ON", "CONSTRAINT"): 1728 constraint = self._parse_id_var() 1729 else: 1730 key = self._parse_csv(self._parse_value) 1731 1732 self._match_text_seq("DO") 1733 if self._match_text_seq("NOTHING"): 1734 nothing = True 1735 else: 1736 self._match(TokenType.UPDATE) 1737 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1738 1739 return self.expression( 1740 exp.OnConflict, 1741 duplicate=duplicate, 1742 expressions=expressions, 1743 nothing=nothing, 1744 key=key, 1745 constraint=constraint, 1746 ) 1747 1748 def _parse_returning(self) -> t.Optional[exp.Returning]: 1749 if not self._match(TokenType.RETURNING): 1750 return None 1751 1752 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1753 1754 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1755 if not self._match(TokenType.FORMAT): 1756 return None 1757 return self._parse_row_format() 1758 1759 def _parse_row_format( 1760 self, match_row: bool = False 1761 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1762 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1763 return None 1764 1765 if self._match_text_seq("SERDE"): 1766 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1767 1768 self._match_text_seq("DELIMITED") 1769 1770 kwargs = {} 1771 1772 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1773 kwargs["fields"] = self._parse_string() 1774 if self._match_text_seq("ESCAPED", "BY"): 1775 kwargs["escaped"] = self._parse_string() 1776 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1777 kwargs["collection_items"] = self._parse_string() 1778 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1779 kwargs["map_keys"] = self._parse_string() 1780 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1781 kwargs["lines"] = self._parse_string() 1782 if self._match_text_seq("NULL", "DEFINED", "AS"): 1783 kwargs["null"] = self._parse_string() 1784 1785 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1786 1787 def _parse_load(self) -> exp.LoadData | exp.Command: 1788 if self._match_text_seq("DATA"): 1789 local = self._match_text_seq("LOCAL") 1790 self._match_text_seq("INPATH") 1791 inpath = self._parse_string() 1792 overwrite = self._match(TokenType.OVERWRITE) 1793 self._match_pair(TokenType.INTO, TokenType.TABLE) 1794 1795 return self.expression( 1796 exp.LoadData, 1797 this=self._parse_table(schema=True), 1798 local=local, 1799 overwrite=overwrite, 1800 inpath=inpath, 1801 partition=self._parse_partition(), 1802 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1803 serde=self._match_text_seq("SERDE") and self._parse_string(), 1804 ) 1805 return self._parse_as_command(self._prev) 1806 1807 def _parse_delete(self) -> exp.Delete: 1808 self._match(TokenType.FROM) 1809 1810 return self.expression( 1811 exp.Delete, 1812 this=self._parse_table(), 1813 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1814 where=self._parse_where(), 1815 returning=self._parse_returning(), 1816 limit=self._parse_limit(), 1817 ) 1818 1819 def _parse_update(self) -> exp.Update: 1820 return self.expression( 1821 exp.Update, 1822 **{ # type: ignore 1823 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1824 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1825 "from": self._parse_from(modifiers=True), 1826 "where": self._parse_where(), 1827 "returning": self._parse_returning(), 1828 "limit": self._parse_limit(), 1829 }, 1830 ) 1831 1832 def _parse_uncache(self) -> exp.Uncache: 1833 if not self._match(TokenType.TABLE): 1834 self.raise_error("Expecting TABLE after UNCACHE") 1835 1836 return self.expression( 1837 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1838 ) 1839 1840 def _parse_cache(self) -> exp.Cache: 1841 lazy = self._match_text_seq("LAZY") 1842 self._match(TokenType.TABLE) 1843 table = self._parse_table(schema=True) 1844 1845 options = [] 1846 if self._match_text_seq("OPTIONS"): 1847 self._match_l_paren() 1848 k = self._parse_string() 1849 self._match(TokenType.EQ) 1850 v = self._parse_string() 1851 options = [k, v] 1852 self._match_r_paren() 1853 1854 self._match(TokenType.ALIAS) 1855 return self.expression( 1856 exp.Cache, 1857 this=table, 1858 lazy=lazy, 1859 options=options, 1860 expression=self._parse_select(nested=True), 1861 ) 1862 1863 def _parse_partition(self) -> t.Optional[exp.Partition]: 1864 if not self._match(TokenType.PARTITION): 1865 return None 1866 1867 return self.expression( 1868 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1869 ) 1870 1871 def _parse_value(self) -> exp.Tuple: 1872 if self._match(TokenType.L_PAREN): 1873 expressions = self._parse_csv(self._parse_conjunction) 1874 self._match_r_paren() 1875 return self.expression(exp.Tuple, expressions=expressions) 1876 1877 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1878 # Source: https://prestodb.io/docs/current/sql/values.html 1879 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1880 1881 def _parse_select( 1882 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1883 ) -> t.Optional[exp.Expression]: 1884 cte = self._parse_with() 1885 if cte: 1886 this = self._parse_statement() 1887 1888 if not this: 1889 self.raise_error("Failed to parse any statement following CTE") 1890 return cte 1891 1892 if "with" in this.arg_types: 1893 this.set("with", cte) 1894 else: 1895 self.raise_error(f"{this.key} does not support CTE") 1896 this = cte 1897 elif self._match(TokenType.SELECT): 1898 comments = self._prev_comments 1899 1900 hint = self._parse_hint() 1901 all_ = self._match(TokenType.ALL) 1902 distinct = self._match(TokenType.DISTINCT) 1903 1904 kind = ( 1905 self._match(TokenType.ALIAS) 1906 and self._match_texts(("STRUCT", "VALUE")) 1907 and self._prev.text 1908 ) 1909 1910 if distinct: 1911 distinct = self.expression( 1912 exp.Distinct, 1913 on=self._parse_value() if self._match(TokenType.ON) else None, 1914 ) 1915 1916 if all_ and distinct: 1917 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1918 1919 limit = self._parse_limit(top=True) 1920 expressions = self._parse_csv(self._parse_expression) 1921 1922 this = self.expression( 1923 exp.Select, 1924 kind=kind, 1925 hint=hint, 1926 distinct=distinct, 1927 expressions=expressions, 1928 limit=limit, 1929 ) 1930 this.comments = comments 1931 1932 into = self._parse_into() 1933 if into: 1934 this.set("into", into) 1935 1936 from_ = self._parse_from() 1937 if from_: 1938 this.set("from", from_) 1939 1940 this = self._parse_query_modifiers(this) 1941 elif (table or nested) and self._match(TokenType.L_PAREN): 1942 if self._match(TokenType.PIVOT): 1943 this = self._parse_simplified_pivot() 1944 elif self._match(TokenType.FROM): 1945 this = exp.select("*").from_( 1946 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1947 ) 1948 else: 1949 this = self._parse_table() if table else self._parse_select(nested=True) 1950 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1951 1952 self._match_r_paren() 1953 1954 # early return so that subquery unions aren't parsed again 1955 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1956 # Union ALL should be a property of the top select node, not the subquery 1957 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1958 elif self._match(TokenType.VALUES): 1959 this = self.expression( 1960 exp.Values, 1961 expressions=self._parse_csv(self._parse_value), 1962 alias=self._parse_table_alias(), 1963 ) 1964 else: 1965 this = None 1966 1967 return self._parse_set_operations(this) 1968 1969 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 1970 if not skip_with_token and not self._match(TokenType.WITH): 1971 return None 1972 1973 comments = self._prev_comments 1974 recursive = self._match(TokenType.RECURSIVE) 1975 1976 expressions = [] 1977 while True: 1978 expressions.append(self._parse_cte()) 1979 1980 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1981 break 1982 else: 1983 self._match(TokenType.WITH) 1984 1985 return self.expression( 1986 exp.With, comments=comments, expressions=expressions, recursive=recursive 1987 ) 1988 1989 def _parse_cte(self) -> exp.CTE: 1990 alias = self._parse_table_alias() 1991 if not alias or not alias.this: 1992 self.raise_error("Expected CTE to have alias") 1993 1994 self._match(TokenType.ALIAS) 1995 return self.expression( 1996 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 1997 ) 1998 1999 def _parse_table_alias( 2000 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2001 ) -> t.Optional[exp.TableAlias]: 2002 any_token = self._match(TokenType.ALIAS) 2003 alias = ( 2004 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2005 or self._parse_string_as_identifier() 2006 ) 2007 2008 index = self._index 2009 if self._match(TokenType.L_PAREN): 2010 columns = self._parse_csv(self._parse_function_parameter) 2011 self._match_r_paren() if columns else self._retreat(index) 2012 else: 2013 columns = None 2014 2015 if not alias and not columns: 2016 return None 2017 2018 return self.expression(exp.TableAlias, this=alias, columns=columns) 2019 2020 def _parse_subquery( 2021 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2022 ) -> t.Optional[exp.Subquery]: 2023 if not this: 2024 return None 2025 2026 return self.expression( 2027 exp.Subquery, 2028 this=this, 2029 pivots=self._parse_pivots(), 2030 alias=self._parse_table_alias() if parse_alias else None, 2031 ) 2032 2033 def _parse_query_modifiers( 2034 self, this: t.Optional[exp.Expression] 2035 ) -> t.Optional[exp.Expression]: 2036 if isinstance(this, self.MODIFIABLES): 2037 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2038 expression = parser(self) 2039 2040 if expression: 2041 if key == "limit": 2042 offset = expression.args.pop("offset", None) 2043 if offset: 2044 this.set("offset", exp.Offset(expression=offset)) 2045 this.set(key, expression) 2046 return this 2047 2048 def _parse_hint(self) -> t.Optional[exp.Hint]: 2049 if self._match(TokenType.HINT): 2050 hints = self._parse_csv(self._parse_function) 2051 2052 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2053 self.raise_error("Expected */ after HINT") 2054 2055 return self.expression(exp.Hint, expressions=hints) 2056 2057 return None 2058 2059 def _parse_into(self) -> t.Optional[exp.Into]: 2060 if not self._match(TokenType.INTO): 2061 return None 2062 2063 temp = self._match(TokenType.TEMPORARY) 2064 unlogged = self._match_text_seq("UNLOGGED") 2065 self._match(TokenType.TABLE) 2066 2067 return self.expression( 2068 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2069 ) 2070 2071 def _parse_from( 2072 self, modifiers: bool = False, skip_from_token: bool = False 2073 ) -> t.Optional[exp.From]: 2074 if not skip_from_token and not self._match(TokenType.FROM): 2075 return None 2076 2077 comments = self._prev_comments 2078 this = self._parse_table() 2079 2080 return self.expression( 2081 exp.From, 2082 comments=comments, 2083 this=self._parse_query_modifiers(this) if modifiers else this, 2084 ) 2085 2086 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2087 if not self._match(TokenType.MATCH_RECOGNIZE): 2088 return None 2089 2090 self._match_l_paren() 2091 2092 partition = self._parse_partition_by() 2093 order = self._parse_order() 2094 measures = ( 2095 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2096 ) 2097 2098 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2099 rows = exp.var("ONE ROW PER MATCH") 2100 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2101 text = "ALL ROWS PER MATCH" 2102 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2103 text += f" SHOW EMPTY MATCHES" 2104 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2105 text += f" OMIT EMPTY MATCHES" 2106 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2107 text += f" WITH UNMATCHED ROWS" 2108 rows = exp.var(text) 2109 else: 2110 rows = None 2111 2112 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2113 text = "AFTER MATCH SKIP" 2114 if self._match_text_seq("PAST", "LAST", "ROW"): 2115 text += f" PAST LAST ROW" 2116 elif self._match_text_seq("TO", "NEXT", "ROW"): 2117 text += f" TO NEXT ROW" 2118 elif self._match_text_seq("TO", "FIRST"): 2119 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2120 elif self._match_text_seq("TO", "LAST"): 2121 text += f" TO LAST {self._advance_any().text}" # type: ignore 2122 after = exp.var(text) 2123 else: 2124 after = None 2125 2126 if self._match_text_seq("PATTERN"): 2127 self._match_l_paren() 2128 2129 if not self._curr: 2130 self.raise_error("Expecting )", self._curr) 2131 2132 paren = 1 2133 start = self._curr 2134 2135 while self._curr and paren > 0: 2136 if self._curr.token_type == TokenType.L_PAREN: 2137 paren += 1 2138 if self._curr.token_type == TokenType.R_PAREN: 2139 paren -= 1 2140 2141 end = self._prev 2142 self._advance() 2143 2144 if paren > 0: 2145 self.raise_error("Expecting )", self._curr) 2146 2147 pattern = exp.var(self._find_sql(start, end)) 2148 else: 2149 pattern = None 2150 2151 define = ( 2152 self._parse_csv( 2153 lambda: self.expression( 2154 exp.Alias, 2155 alias=self._parse_id_var(any_token=True), 2156 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2157 ) 2158 ) 2159 if self._match_text_seq("DEFINE") 2160 else None 2161 ) 2162 2163 self._match_r_paren() 2164 2165 return self.expression( 2166 exp.MatchRecognize, 2167 partition_by=partition, 2168 order=order, 2169 measures=measures, 2170 rows=rows, 2171 after=after, 2172 pattern=pattern, 2173 define=define, 2174 alias=self._parse_table_alias(), 2175 ) 2176 2177 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2178 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2179 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2180 2181 if outer_apply or cross_apply: 2182 this = self._parse_select(table=True) 2183 view = None 2184 outer = not cross_apply 2185 elif self._match(TokenType.LATERAL): 2186 this = self._parse_select(table=True) 2187 view = self._match(TokenType.VIEW) 2188 outer = self._match(TokenType.OUTER) 2189 else: 2190 return None 2191 2192 if not this: 2193 this = self._parse_function() or self._parse_id_var(any_token=False) 2194 while self._match(TokenType.DOT): 2195 this = exp.Dot( 2196 this=this, 2197 expression=self._parse_function() or self._parse_id_var(any_token=False), 2198 ) 2199 2200 if view: 2201 table = self._parse_id_var(any_token=False) 2202 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2203 table_alias: t.Optional[exp.TableAlias] = self.expression( 2204 exp.TableAlias, this=table, columns=columns 2205 ) 2206 elif isinstance(this, exp.Subquery) and this.alias: 2207 # Ensures parity between the Subquery's and the Lateral's "alias" args 2208 table_alias = this.args["alias"].copy() 2209 else: 2210 table_alias = self._parse_table_alias() 2211 2212 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2213 2214 def _parse_join_parts( 2215 self, 2216 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2217 return ( 2218 self._match_set(self.JOIN_METHODS) and self._prev, 2219 self._match_set(self.JOIN_SIDES) and self._prev, 2220 self._match_set(self.JOIN_KINDS) and self._prev, 2221 ) 2222 2223 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2224 if self._match(TokenType.COMMA): 2225 return self.expression(exp.Join, this=self._parse_table()) 2226 2227 index = self._index 2228 method, side, kind = self._parse_join_parts() 2229 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2230 join = self._match(TokenType.JOIN) 2231 2232 if not skip_join_token and not join: 2233 self._retreat(index) 2234 kind = None 2235 method = None 2236 side = None 2237 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2240 2241 if not skip_join_token and not join and not outer_apply and not cross_apply: 2242 return None 2243 2244 if outer_apply: 2245 side = Token(TokenType.LEFT, "LEFT") 2246 2247 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2248 2249 if method: 2250 kwargs["method"] = method.text 2251 if side: 2252 kwargs["side"] = side.text 2253 if kind: 2254 kwargs["kind"] = kind.text 2255 if hint: 2256 kwargs["hint"] = hint 2257 2258 if self._match(TokenType.ON): 2259 kwargs["on"] = self._parse_conjunction() 2260 elif self._match(TokenType.USING): 2261 kwargs["using"] = self._parse_wrapped_id_vars() 2262 2263 return self.expression(exp.Join, **kwargs) 2264 2265 def _parse_index( 2266 self, 2267 index: t.Optional[exp.Expression] = None, 2268 ) -> t.Optional[exp.Index]: 2269 if index: 2270 unique = None 2271 primary = None 2272 amp = None 2273 2274 self._match(TokenType.ON) 2275 self._match(TokenType.TABLE) # hive 2276 table = self._parse_table_parts(schema=True) 2277 else: 2278 unique = self._match(TokenType.UNIQUE) 2279 primary = self._match_text_seq("PRIMARY") 2280 amp = self._match_text_seq("AMP") 2281 2282 if not self._match(TokenType.INDEX): 2283 return None 2284 2285 index = self._parse_id_var() 2286 table = None 2287 2288 using = self._parse_field() if self._match(TokenType.USING) else None 2289 2290 if self._match(TokenType.L_PAREN, advance=False): 2291 columns = self._parse_wrapped_csv(self._parse_ordered) 2292 else: 2293 columns = None 2294 2295 return self.expression( 2296 exp.Index, 2297 this=index, 2298 table=table, 2299 using=using, 2300 columns=columns, 2301 unique=unique, 2302 primary=primary, 2303 amp=amp, 2304 partition_by=self._parse_partition_by(), 2305 ) 2306 2307 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2308 hints: t.List[exp.Expression] = [] 2309 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2310 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2311 hints.append( 2312 self.expression( 2313 exp.WithTableHint, 2314 expressions=self._parse_csv( 2315 lambda: self._parse_function() or self._parse_var(any_token=True) 2316 ), 2317 ) 2318 ) 2319 self._match_r_paren() 2320 else: 2321 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2322 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2323 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2324 2325 self._match_texts({"INDEX", "KEY"}) 2326 if self._match(TokenType.FOR): 2327 hint.set("target", self._advance_any() and self._prev.text.upper()) 2328 2329 hint.set("expressions", self._parse_wrapped_id_vars()) 2330 hints.append(hint) 2331 2332 return hints or None 2333 2334 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2335 return ( 2336 (not schema and self._parse_function(optional_parens=False)) 2337 or self._parse_id_var(any_token=False) 2338 or self._parse_string_as_identifier() 2339 or self._parse_placeholder() 2340 ) 2341 2342 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2343 catalog = None 2344 db = None 2345 table = self._parse_table_part(schema=schema) 2346 2347 while self._match(TokenType.DOT): 2348 if catalog: 2349 # This allows nesting the table in arbitrarily many dot expressions if needed 2350 table = self.expression( 2351 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2352 ) 2353 else: 2354 catalog = db 2355 db = table 2356 table = self._parse_table_part(schema=schema) 2357 2358 if not table: 2359 self.raise_error(f"Expected table name but got {self._curr}") 2360 2361 return self.expression( 2362 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2363 ) 2364 2365 def _parse_table( 2366 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2367 ) -> t.Optional[exp.Expression]: 2368 lateral = self._parse_lateral() 2369 if lateral: 2370 return lateral 2371 2372 unnest = self._parse_unnest() 2373 if unnest: 2374 return unnest 2375 2376 values = self._parse_derived_table_values() 2377 if values: 2378 return values 2379 2380 subquery = self._parse_select(table=True) 2381 if subquery: 2382 if not subquery.args.get("pivots"): 2383 subquery.set("pivots", self._parse_pivots()) 2384 return subquery 2385 2386 this: exp.Expression = self._parse_table_parts(schema=schema) 2387 2388 if schema: 2389 return self._parse_schema(this=this) 2390 2391 if self.ALIAS_POST_TABLESAMPLE: 2392 table_sample = self._parse_table_sample() 2393 2394 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2395 if alias: 2396 this.set("alias", alias) 2397 2398 if not this.args.get("pivots"): 2399 this.set("pivots", self._parse_pivots()) 2400 2401 this.set("hints", self._parse_table_hints()) 2402 2403 if not self.ALIAS_POST_TABLESAMPLE: 2404 table_sample = self._parse_table_sample() 2405 2406 if table_sample: 2407 table_sample.set("this", this) 2408 this = table_sample 2409 2410 return this 2411 2412 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2413 if not self._match(TokenType.UNNEST): 2414 return None 2415 2416 expressions = self._parse_wrapped_csv(self._parse_type) 2417 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2418 2419 alias = self._parse_table_alias() if with_alias else None 2420 2421 if alias and self.UNNEST_COLUMN_ONLY: 2422 if alias.args.get("columns"): 2423 self.raise_error("Unexpected extra column alias in unnest.") 2424 2425 alias.set("columns", [alias.this]) 2426 alias.set("this", None) 2427 2428 offset = None 2429 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2430 self._match(TokenType.ALIAS) 2431 offset = self._parse_id_var() or exp.to_identifier("offset") 2432 2433 return self.expression( 2434 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2435 ) 2436 2437 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2438 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2439 if not is_derived and not self._match(TokenType.VALUES): 2440 return None 2441 2442 expressions = self._parse_csv(self._parse_value) 2443 alias = self._parse_table_alias() 2444 2445 if is_derived: 2446 self._match_r_paren() 2447 2448 return self.expression( 2449 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2450 ) 2451 2452 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2453 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2454 as_modifier and self._match_text_seq("USING", "SAMPLE") 2455 ): 2456 return None 2457 2458 bucket_numerator = None 2459 bucket_denominator = None 2460 bucket_field = None 2461 percent = None 2462 rows = None 2463 size = None 2464 seed = None 2465 2466 kind = ( 2467 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2468 ) 2469 method = self._parse_var(tokens=(TokenType.ROW,)) 2470 2471 self._match(TokenType.L_PAREN) 2472 2473 num = self._parse_number() 2474 2475 if self._match_text_seq("BUCKET"): 2476 bucket_numerator = self._parse_number() 2477 self._match_text_seq("OUT", "OF") 2478 bucket_denominator = bucket_denominator = self._parse_number() 2479 self._match(TokenType.ON) 2480 bucket_field = self._parse_field() 2481 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2482 percent = num 2483 elif self._match(TokenType.ROWS): 2484 rows = num 2485 else: 2486 size = num 2487 2488 self._match(TokenType.R_PAREN) 2489 2490 if self._match(TokenType.L_PAREN): 2491 method = self._parse_var() 2492 seed = self._match(TokenType.COMMA) and self._parse_number() 2493 self._match_r_paren() 2494 elif self._match_texts(("SEED", "REPEATABLE")): 2495 seed = self._parse_wrapped(self._parse_number) 2496 2497 return self.expression( 2498 exp.TableSample, 2499 method=method, 2500 bucket_numerator=bucket_numerator, 2501 bucket_denominator=bucket_denominator, 2502 bucket_field=bucket_field, 2503 percent=percent, 2504 rows=rows, 2505 size=size, 2506 seed=seed, 2507 kind=kind, 2508 ) 2509 2510 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2511 return list(iter(self._parse_pivot, None)) 2512 2513 # https://duckdb.org/docs/sql/statements/pivot 2514 def _parse_simplified_pivot(self) -> exp.Pivot: 2515 def _parse_on() -> t.Optional[exp.Expression]: 2516 this = self._parse_bitwise() 2517 return self._parse_in(this) if self._match(TokenType.IN) else this 2518 2519 this = self._parse_table() 2520 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2521 using = self._match(TokenType.USING) and self._parse_csv( 2522 lambda: self._parse_alias(self._parse_function()) 2523 ) 2524 group = self._parse_group() 2525 return self.expression( 2526 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2527 ) 2528 2529 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2530 index = self._index 2531 2532 if self._match(TokenType.PIVOT): 2533 unpivot = False 2534 elif self._match(TokenType.UNPIVOT): 2535 unpivot = True 2536 else: 2537 return None 2538 2539 expressions = [] 2540 field = None 2541 2542 if not self._match(TokenType.L_PAREN): 2543 self._retreat(index) 2544 return None 2545 2546 if unpivot: 2547 expressions = self._parse_csv(self._parse_column) 2548 else: 2549 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2550 2551 if not expressions: 2552 self.raise_error("Failed to parse PIVOT's aggregation list") 2553 2554 if not self._match(TokenType.FOR): 2555 self.raise_error("Expecting FOR") 2556 2557 value = self._parse_column() 2558 2559 if not self._match(TokenType.IN): 2560 self.raise_error("Expecting IN") 2561 2562 field = self._parse_in(value, alias=True) 2563 2564 self._match_r_paren() 2565 2566 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2567 2568 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2569 pivot.set("alias", self._parse_table_alias()) 2570 2571 if not unpivot: 2572 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2573 2574 columns: t.List[exp.Expression] = [] 2575 for fld in pivot.args["field"].expressions: 2576 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2577 for name in names: 2578 if self.PREFIXED_PIVOT_COLUMNS: 2579 name = f"{name}_{field_name}" if name else field_name 2580 else: 2581 name = f"{field_name}_{name}" if name else field_name 2582 2583 columns.append(exp.to_identifier(name)) 2584 2585 pivot.set("columns", columns) 2586 2587 return pivot 2588 2589 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2590 return [agg.alias for agg in aggregations] 2591 2592 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2593 if not skip_where_token and not self._match(TokenType.WHERE): 2594 return None 2595 2596 return self.expression( 2597 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2598 ) 2599 2600 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2601 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2602 return None 2603 2604 elements = defaultdict(list) 2605 2606 while True: 2607 expressions = self._parse_csv(self._parse_conjunction) 2608 if expressions: 2609 elements["expressions"].extend(expressions) 2610 2611 grouping_sets = self._parse_grouping_sets() 2612 if grouping_sets: 2613 elements["grouping_sets"].extend(grouping_sets) 2614 2615 rollup = None 2616 cube = None 2617 totals = None 2618 2619 with_ = self._match(TokenType.WITH) 2620 if self._match(TokenType.ROLLUP): 2621 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2622 elements["rollup"].extend(ensure_list(rollup)) 2623 2624 if self._match(TokenType.CUBE): 2625 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2626 elements["cube"].extend(ensure_list(cube)) 2627 2628 if self._match_text_seq("TOTALS"): 2629 totals = True 2630 elements["totals"] = True # type: ignore 2631 2632 if not (grouping_sets or rollup or cube or totals): 2633 break 2634 2635 return self.expression(exp.Group, **elements) # type: ignore 2636 2637 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2638 if not self._match(TokenType.GROUPING_SETS): 2639 return None 2640 2641 return self._parse_wrapped_csv(self._parse_grouping_set) 2642 2643 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2644 if self._match(TokenType.L_PAREN): 2645 grouping_set = self._parse_csv(self._parse_column) 2646 self._match_r_paren() 2647 return self.expression(exp.Tuple, expressions=grouping_set) 2648 2649 return self._parse_column() 2650 2651 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2652 if not skip_having_token and not self._match(TokenType.HAVING): 2653 return None 2654 return self.expression(exp.Having, this=self._parse_conjunction()) 2655 2656 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2657 if not self._match(TokenType.QUALIFY): 2658 return None 2659 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2660 2661 def _parse_order( 2662 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2663 ) -> t.Optional[exp.Expression]: 2664 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2665 return this 2666 2667 return self.expression( 2668 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2669 ) 2670 2671 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2672 if not self._match(token): 2673 return None 2674 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2675 2676 def _parse_ordered(self) -> exp.Ordered: 2677 this = self._parse_conjunction() 2678 self._match(TokenType.ASC) 2679 2680 is_desc = self._match(TokenType.DESC) 2681 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2682 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2683 desc = is_desc or False 2684 asc = not desc 2685 nulls_first = is_nulls_first or False 2686 explicitly_null_ordered = is_nulls_first or is_nulls_last 2687 2688 if ( 2689 not explicitly_null_ordered 2690 and ( 2691 (asc and self.NULL_ORDERING == "nulls_are_small") 2692 or (desc and self.NULL_ORDERING != "nulls_are_small") 2693 ) 2694 and self.NULL_ORDERING != "nulls_are_last" 2695 ): 2696 nulls_first = True 2697 2698 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2699 2700 def _parse_limit( 2701 self, this: t.Optional[exp.Expression] = None, top: bool = False 2702 ) -> t.Optional[exp.Expression]: 2703 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2704 limit_paren = self._match(TokenType.L_PAREN) 2705 expression = self._parse_number() if top else self._parse_term() 2706 2707 if self._match(TokenType.COMMA): 2708 offset = expression 2709 expression = self._parse_term() 2710 else: 2711 offset = None 2712 2713 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2714 2715 if limit_paren: 2716 self._match_r_paren() 2717 2718 return limit_exp 2719 2720 if self._match(TokenType.FETCH): 2721 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2722 direction = self._prev.text if direction else "FIRST" 2723 2724 count = self._parse_number() 2725 percent = self._match(TokenType.PERCENT) 2726 2727 self._match_set((TokenType.ROW, TokenType.ROWS)) 2728 2729 only = self._match_text_seq("ONLY") 2730 with_ties = self._match_text_seq("WITH", "TIES") 2731 2732 if only and with_ties: 2733 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2734 2735 return self.expression( 2736 exp.Fetch, 2737 direction=direction, 2738 count=count, 2739 percent=percent, 2740 with_ties=with_ties, 2741 ) 2742 2743 return this 2744 2745 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2746 if not self._match(TokenType.OFFSET): 2747 return this 2748 2749 count = self._parse_number() 2750 self._match_set((TokenType.ROW, TokenType.ROWS)) 2751 return self.expression(exp.Offset, this=this, expression=count) 2752 2753 def _parse_locks(self) -> t.List[exp.Lock]: 2754 locks = [] 2755 while True: 2756 if self._match_text_seq("FOR", "UPDATE"): 2757 update = True 2758 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2759 "LOCK", "IN", "SHARE", "MODE" 2760 ): 2761 update = False 2762 else: 2763 break 2764 2765 expressions = None 2766 if self._match_text_seq("OF"): 2767 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2768 2769 wait: t.Optional[bool | exp.Expression] = None 2770 if self._match_text_seq("NOWAIT"): 2771 wait = True 2772 elif self._match_text_seq("WAIT"): 2773 wait = self._parse_primary() 2774 elif self._match_text_seq("SKIP", "LOCKED"): 2775 wait = False 2776 2777 locks.append( 2778 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2779 ) 2780 2781 return locks 2782 2783 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2784 if not self._match_set(self.SET_OPERATIONS): 2785 return this 2786 2787 token_type = self._prev.token_type 2788 2789 if token_type == TokenType.UNION: 2790 expression = exp.Union 2791 elif token_type == TokenType.EXCEPT: 2792 expression = exp.Except 2793 else: 2794 expression = exp.Intersect 2795 2796 return self.expression( 2797 expression, 2798 this=this, 2799 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2800 expression=self._parse_set_operations(self._parse_select(nested=True)), 2801 ) 2802 2803 def _parse_expression(self) -> t.Optional[exp.Expression]: 2804 return self._parse_alias(self._parse_conjunction()) 2805 2806 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2807 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2808 2809 def _parse_equality(self) -> t.Optional[exp.Expression]: 2810 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2811 2812 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2813 return self._parse_tokens(self._parse_range, self.COMPARISON) 2814 2815 def _parse_range(self) -> t.Optional[exp.Expression]: 2816 this = self._parse_bitwise() 2817 negate = self._match(TokenType.NOT) 2818 2819 if self._match_set(self.RANGE_PARSERS): 2820 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2821 if not expression: 2822 return this 2823 2824 this = expression 2825 elif self._match(TokenType.ISNULL): 2826 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2827 2828 # Postgres supports ISNULL and NOTNULL for conditions. 2829 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2830 if self._match(TokenType.NOTNULL): 2831 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2832 this = self.expression(exp.Not, this=this) 2833 2834 if negate: 2835 this = self.expression(exp.Not, this=this) 2836 2837 if self._match(TokenType.IS): 2838 this = self._parse_is(this) 2839 2840 return this 2841 2842 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2843 index = self._index - 1 2844 negate = self._match(TokenType.NOT) 2845 2846 if self._match_text_seq("DISTINCT", "FROM"): 2847 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2848 return self.expression(klass, this=this, expression=self._parse_expression()) 2849 2850 expression = self._parse_null() or self._parse_boolean() 2851 if not expression: 2852 self._retreat(index) 2853 return None 2854 2855 this = self.expression(exp.Is, this=this, expression=expression) 2856 return self.expression(exp.Not, this=this) if negate else this 2857 2858 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2859 unnest = self._parse_unnest(with_alias=False) 2860 if unnest: 2861 this = self.expression(exp.In, this=this, unnest=unnest) 2862 elif self._match(TokenType.L_PAREN): 2863 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2864 2865 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2866 this = self.expression(exp.In, this=this, query=expressions[0]) 2867 else: 2868 this = self.expression(exp.In, this=this, expressions=expressions) 2869 2870 self._match_r_paren(this) 2871 else: 2872 this = self.expression(exp.In, this=this, field=self._parse_field()) 2873 2874 return this 2875 2876 def _parse_between(self, this: exp.Expression) -> exp.Between: 2877 low = self._parse_bitwise() 2878 self._match(TokenType.AND) 2879 high = self._parse_bitwise() 2880 return self.expression(exp.Between, this=this, low=low, high=high) 2881 2882 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2883 if not self._match(TokenType.ESCAPE): 2884 return this 2885 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2886 2887 def _parse_interval(self) -> t.Optional[exp.Interval]: 2888 if not self._match(TokenType.INTERVAL): 2889 return None 2890 2891 if self._match(TokenType.STRING, advance=False): 2892 this = self._parse_primary() 2893 else: 2894 this = self._parse_term() 2895 2896 unit = self._parse_function() or self._parse_var() 2897 2898 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2899 # each INTERVAL expression into this canonical form so it's easy to transpile 2900 if this and this.is_number: 2901 this = exp.Literal.string(this.name) 2902 elif this and this.is_string: 2903 parts = this.name.split() 2904 2905 if len(parts) == 2: 2906 if unit: 2907 # this is not actually a unit, it's something else 2908 unit = None 2909 self._retreat(self._index - 1) 2910 else: 2911 this = exp.Literal.string(parts[0]) 2912 unit = self.expression(exp.Var, this=parts[1]) 2913 2914 return self.expression(exp.Interval, this=this, unit=unit) 2915 2916 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_term() 2918 2919 while True: 2920 if self._match_set(self.BITWISE): 2921 this = self.expression( 2922 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2923 ) 2924 elif self._match_pair(TokenType.LT, TokenType.LT): 2925 this = self.expression( 2926 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2927 ) 2928 elif self._match_pair(TokenType.GT, TokenType.GT): 2929 this = self.expression( 2930 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2931 ) 2932 else: 2933 break 2934 2935 return this 2936 2937 def _parse_term(self) -> t.Optional[exp.Expression]: 2938 return self._parse_tokens(self._parse_factor, self.TERM) 2939 2940 def _parse_factor(self) -> t.Optional[exp.Expression]: 2941 return self._parse_tokens(self._parse_unary, self.FACTOR) 2942 2943 def _parse_unary(self) -> t.Optional[exp.Expression]: 2944 if self._match_set(self.UNARY_PARSERS): 2945 return self.UNARY_PARSERS[self._prev.token_type](self) 2946 return self._parse_at_time_zone(self._parse_type()) 2947 2948 def _parse_type(self) -> t.Optional[exp.Expression]: 2949 interval = self._parse_interval() 2950 if interval: 2951 return interval 2952 2953 index = self._index 2954 data_type = self._parse_types(check_func=True) 2955 this = self._parse_column() 2956 2957 if data_type: 2958 if isinstance(this, exp.Literal): 2959 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2960 if parser: 2961 return parser(self, this, data_type) 2962 return self.expression(exp.Cast, this=this, to=data_type) 2963 if not data_type.expressions: 2964 self._retreat(index) 2965 return self._parse_column() 2966 return self._parse_column_ops(data_type) 2967 2968 return this 2969 2970 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 2971 this = self._parse_type() 2972 if not this: 2973 return None 2974 2975 return self.expression( 2976 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2977 ) 2978 2979 def _parse_types( 2980 self, check_func: bool = False, schema: bool = False 2981 ) -> t.Optional[exp.Expression]: 2982 index = self._index 2983 2984 prefix = self._match_text_seq("SYSUDTLIB", ".") 2985 2986 if not self._match_set(self.TYPE_TOKENS): 2987 return None 2988 2989 type_token = self._prev.token_type 2990 2991 if type_token == TokenType.PSEUDO_TYPE: 2992 return self.expression(exp.PseudoType, this=self._prev.text) 2993 2994 nested = type_token in self.NESTED_TYPE_TOKENS 2995 is_struct = type_token == TokenType.STRUCT 2996 expressions = None 2997 maybe_func = False 2998 2999 if self._match(TokenType.L_PAREN): 3000 if is_struct: 3001 expressions = self._parse_csv(self._parse_struct_types) 3002 elif nested: 3003 expressions = self._parse_csv( 3004 lambda: self._parse_types(check_func=check_func, schema=schema) 3005 ) 3006 elif type_token in self.ENUM_TYPE_TOKENS: 3007 expressions = self._parse_csv(self._parse_primary) 3008 else: 3009 expressions = self._parse_csv(self._parse_type_size) 3010 3011 if not expressions or not self._match(TokenType.R_PAREN): 3012 self._retreat(index) 3013 return None 3014 3015 maybe_func = True 3016 3017 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3018 this = exp.DataType( 3019 this=exp.DataType.Type.ARRAY, 3020 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3021 nested=True, 3022 ) 3023 3024 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3025 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3026 3027 return this 3028 3029 if self._match(TokenType.L_BRACKET): 3030 self._retreat(index) 3031 return None 3032 3033 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3034 if nested and self._match(TokenType.LT): 3035 if is_struct: 3036 expressions = self._parse_csv(self._parse_struct_types) 3037 else: 3038 expressions = self._parse_csv( 3039 lambda: self._parse_types(check_func=check_func, schema=schema) 3040 ) 3041 3042 if not self._match(TokenType.GT): 3043 self.raise_error("Expecting >") 3044 3045 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3046 values = self._parse_csv(self._parse_conjunction) 3047 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3048 3049 value: t.Optional[exp.Expression] = None 3050 if type_token in self.TIMESTAMPS: 3051 if self._match_text_seq("WITH", "TIME", "ZONE"): 3052 maybe_func = False 3053 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3054 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3055 maybe_func = False 3056 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3057 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3058 maybe_func = False 3059 elif type_token == TokenType.INTERVAL: 3060 unit = self._parse_var() 3061 3062 if not unit: 3063 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3064 else: 3065 value = self.expression(exp.Interval, unit=unit) 3066 3067 if maybe_func and check_func: 3068 index2 = self._index 3069 peek = self._parse_string() 3070 3071 if not peek: 3072 self._retreat(index) 3073 return None 3074 3075 self._retreat(index2) 3076 3077 if value: 3078 return value 3079 3080 return exp.DataType( 3081 this=exp.DataType.Type[type_token.value.upper()], 3082 expressions=expressions, 3083 nested=nested, 3084 values=values, 3085 prefix=prefix, 3086 ) 3087 3088 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3089 this = self._parse_type() or self._parse_id_var() 3090 self._match(TokenType.COLON) 3091 return self._parse_column_def(this) 3092 3093 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3094 if not self._match_text_seq("AT", "TIME", "ZONE"): 3095 return this 3096 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3097 3098 def _parse_column(self) -> t.Optional[exp.Expression]: 3099 this = self._parse_field() 3100 if isinstance(this, exp.Identifier): 3101 this = self.expression(exp.Column, this=this) 3102 elif not this: 3103 return self._parse_bracket(this) 3104 return self._parse_column_ops(this) 3105 3106 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3107 this = self._parse_bracket(this) 3108 3109 while self._match_set(self.COLUMN_OPERATORS): 3110 op_token = self._prev.token_type 3111 op = self.COLUMN_OPERATORS.get(op_token) 3112 3113 if op_token == TokenType.DCOLON: 3114 field = self._parse_types() 3115 if not field: 3116 self.raise_error("Expected type") 3117 elif op and self._curr: 3118 self._advance() 3119 value = self._prev.text 3120 field = ( 3121 exp.Literal.number(value) 3122 if self._prev.token_type == TokenType.NUMBER 3123 else exp.Literal.string(value) 3124 ) 3125 else: 3126 field = self._parse_field(anonymous_func=True, any_token=True) 3127 3128 if isinstance(field, exp.Func): 3129 # bigquery allows function calls like x.y.count(...) 3130 # SAFE.SUBSTR(...) 3131 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3132 this = self._replace_columns_with_dots(this) 3133 3134 if op: 3135 this = op(self, this, field) 3136 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3137 this = self.expression( 3138 exp.Column, 3139 this=field, 3140 table=this.this, 3141 db=this.args.get("table"), 3142 catalog=this.args.get("db"), 3143 ) 3144 else: 3145 this = self.expression(exp.Dot, this=this, expression=field) 3146 this = self._parse_bracket(this) 3147 return this 3148 3149 def _parse_primary(self) -> t.Optional[exp.Expression]: 3150 if self._match_set(self.PRIMARY_PARSERS): 3151 token_type = self._prev.token_type 3152 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3153 3154 if token_type == TokenType.STRING: 3155 expressions = [primary] 3156 while self._match(TokenType.STRING): 3157 expressions.append(exp.Literal.string(self._prev.text)) 3158 3159 if len(expressions) > 1: 3160 return self.expression(exp.Concat, expressions=expressions) 3161 3162 return primary 3163 3164 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3165 return exp.Literal.number(f"0.{self._prev.text}") 3166 3167 if self._match(TokenType.L_PAREN): 3168 comments = self._prev_comments 3169 query = self._parse_select() 3170 3171 if query: 3172 expressions = [query] 3173 else: 3174 expressions = self._parse_csv(self._parse_expression) 3175 3176 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3177 3178 if isinstance(this, exp.Subqueryable): 3179 this = self._parse_set_operations( 3180 self._parse_subquery(this=this, parse_alias=False) 3181 ) 3182 elif len(expressions) > 1: 3183 this = self.expression(exp.Tuple, expressions=expressions) 3184 else: 3185 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3186 3187 if this: 3188 this.add_comments(comments) 3189 3190 self._match_r_paren(expression=this) 3191 return this 3192 3193 return None 3194 3195 def _parse_field( 3196 self, 3197 any_token: bool = False, 3198 tokens: t.Optional[t.Collection[TokenType]] = None, 3199 anonymous_func: bool = False, 3200 ) -> t.Optional[exp.Expression]: 3201 return ( 3202 self._parse_primary() 3203 or self._parse_function(anonymous=anonymous_func) 3204 or self._parse_id_var(any_token=any_token, tokens=tokens) 3205 ) 3206 3207 def _parse_function( 3208 self, 3209 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3210 anonymous: bool = False, 3211 optional_parens: bool = True, 3212 ) -> t.Optional[exp.Expression]: 3213 if not self._curr: 3214 return None 3215 3216 token_type = self._curr.token_type 3217 3218 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3219 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3220 3221 if not self._next or self._next.token_type != TokenType.L_PAREN: 3222 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3223 self._advance() 3224 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3225 3226 return None 3227 3228 if token_type not in self.FUNC_TOKENS: 3229 return None 3230 3231 this = self._curr.text 3232 upper = this.upper() 3233 self._advance(2) 3234 3235 parser = self.FUNCTION_PARSERS.get(upper) 3236 3237 if parser and not anonymous: 3238 this = parser(self) 3239 else: 3240 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3241 3242 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3243 this = self.expression(subquery_predicate, this=self._parse_select()) 3244 self._match_r_paren() 3245 return this 3246 3247 if functions is None: 3248 functions = self.FUNCTIONS 3249 3250 function = functions.get(upper) 3251 3252 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3253 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3254 3255 if function and not anonymous: 3256 this = self.validate_expression(function(args), args) 3257 else: 3258 this = self.expression(exp.Anonymous, this=this, expressions=args) 3259 3260 self._match_r_paren(this) 3261 return self._parse_window(this) 3262 3263 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3264 return self._parse_column_def(self._parse_id_var()) 3265 3266 def _parse_user_defined_function( 3267 self, kind: t.Optional[TokenType] = None 3268 ) -> t.Optional[exp.Expression]: 3269 this = self._parse_id_var() 3270 3271 while self._match(TokenType.DOT): 3272 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3273 3274 if not self._match(TokenType.L_PAREN): 3275 return this 3276 3277 expressions = self._parse_csv(self._parse_function_parameter) 3278 self._match_r_paren() 3279 return self.expression( 3280 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3281 ) 3282 3283 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3284 literal = self._parse_primary() 3285 if literal: 3286 return self.expression(exp.Introducer, this=token.text, expression=literal) 3287 3288 return self.expression(exp.Identifier, this=token.text) 3289 3290 def _parse_session_parameter(self) -> exp.SessionParameter: 3291 kind = None 3292 this = self._parse_id_var() or self._parse_primary() 3293 3294 if this and self._match(TokenType.DOT): 3295 kind = this.name 3296 this = self._parse_var() or self._parse_primary() 3297 3298 return self.expression(exp.SessionParameter, this=this, kind=kind) 3299 3300 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3301 index = self._index 3302 3303 if self._match(TokenType.L_PAREN): 3304 expressions = self._parse_csv(self._parse_id_var) 3305 3306 if not self._match(TokenType.R_PAREN): 3307 self._retreat(index) 3308 else: 3309 expressions = [self._parse_id_var()] 3310 3311 if self._match_set(self.LAMBDAS): 3312 return self.LAMBDAS[self._prev.token_type](self, expressions) 3313 3314 self._retreat(index) 3315 3316 this: t.Optional[exp.Expression] 3317 3318 if self._match(TokenType.DISTINCT): 3319 this = self.expression( 3320 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3321 ) 3322 else: 3323 this = self._parse_select_or_expression(alias=alias) 3324 3325 if isinstance(this, exp.EQ): 3326 left = this.this 3327 if isinstance(left, exp.Column): 3328 left.replace(exp.var(left.text("this"))) 3329 3330 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3331 3332 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3333 index = self._index 3334 3335 if not self.errors: 3336 try: 3337 if self._parse_select(nested=True): 3338 return this 3339 except ParseError: 3340 pass 3341 finally: 3342 self.errors.clear() 3343 self._retreat(index) 3344 3345 if not self._match(TokenType.L_PAREN): 3346 return this 3347 3348 args = self._parse_csv( 3349 lambda: self._parse_constraint() 3350 or self._parse_column_def(self._parse_field(any_token=True)) 3351 ) 3352 3353 self._match_r_paren() 3354 return self.expression(exp.Schema, this=this, expressions=args) 3355 3356 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3357 # column defs are not really columns, they're identifiers 3358 if isinstance(this, exp.Column): 3359 this = this.this 3360 3361 kind = self._parse_types(schema=True) 3362 3363 if self._match_text_seq("FOR", "ORDINALITY"): 3364 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3365 3366 constraints = [] 3367 while True: 3368 constraint = self._parse_column_constraint() 3369 if not constraint: 3370 break 3371 constraints.append(constraint) 3372 3373 if not kind and not constraints: 3374 return this 3375 3376 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3377 3378 def _parse_auto_increment( 3379 self, 3380 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3381 start = None 3382 increment = None 3383 3384 if self._match(TokenType.L_PAREN, advance=False): 3385 args = self._parse_wrapped_csv(self._parse_bitwise) 3386 start = seq_get(args, 0) 3387 increment = seq_get(args, 1) 3388 elif self._match_text_seq("START"): 3389 start = self._parse_bitwise() 3390 self._match_text_seq("INCREMENT") 3391 increment = self._parse_bitwise() 3392 3393 if start and increment: 3394 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3395 3396 return exp.AutoIncrementColumnConstraint() 3397 3398 def _parse_compress(self) -> exp.CompressColumnConstraint: 3399 if self._match(TokenType.L_PAREN, advance=False): 3400 return self.expression( 3401 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3402 ) 3403 3404 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3405 3406 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3407 if self._match_text_seq("BY", "DEFAULT"): 3408 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3409 this = self.expression( 3410 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3411 ) 3412 else: 3413 self._match_text_seq("ALWAYS") 3414 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3415 3416 self._match(TokenType.ALIAS) 3417 identity = self._match_text_seq("IDENTITY") 3418 3419 if self._match(TokenType.L_PAREN): 3420 if self._match_text_seq("START", "WITH"): 3421 this.set("start", self._parse_bitwise()) 3422 if self._match_text_seq("INCREMENT", "BY"): 3423 this.set("increment", self._parse_bitwise()) 3424 if self._match_text_seq("MINVALUE"): 3425 this.set("minvalue", self._parse_bitwise()) 3426 if self._match_text_seq("MAXVALUE"): 3427 this.set("maxvalue", self._parse_bitwise()) 3428 3429 if self._match_text_seq("CYCLE"): 3430 this.set("cycle", True) 3431 elif self._match_text_seq("NO", "CYCLE"): 3432 this.set("cycle", False) 3433 3434 if not identity: 3435 this.set("expression", self._parse_bitwise()) 3436 3437 self._match_r_paren() 3438 3439 return this 3440 3441 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3442 self._match_text_seq("LENGTH") 3443 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3444 3445 def _parse_not_constraint( 3446 self, 3447 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3448 if self._match_text_seq("NULL"): 3449 return self.expression(exp.NotNullColumnConstraint) 3450 if self._match_text_seq("CASESPECIFIC"): 3451 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3452 return None 3453 3454 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3455 if self._match(TokenType.CONSTRAINT): 3456 this = self._parse_id_var() 3457 else: 3458 this = None 3459 3460 if self._match_texts(self.CONSTRAINT_PARSERS): 3461 return self.expression( 3462 exp.ColumnConstraint, 3463 this=this, 3464 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3465 ) 3466 3467 return this 3468 3469 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3470 if not self._match(TokenType.CONSTRAINT): 3471 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3472 3473 this = self._parse_id_var() 3474 expressions = [] 3475 3476 while True: 3477 constraint = self._parse_unnamed_constraint() or self._parse_function() 3478 if not constraint: 3479 break 3480 expressions.append(constraint) 3481 3482 return self.expression(exp.Constraint, this=this, expressions=expressions) 3483 3484 def _parse_unnamed_constraint( 3485 self, constraints: t.Optional[t.Collection[str]] = None 3486 ) -> t.Optional[exp.Expression]: 3487 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3488 return None 3489 3490 constraint = self._prev.text.upper() 3491 if constraint not in self.CONSTRAINT_PARSERS: 3492 self.raise_error(f"No parser found for schema constraint {constraint}.") 3493 3494 return self.CONSTRAINT_PARSERS[constraint](self) 3495 3496 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3497 self._match_text_seq("KEY") 3498 return self.expression( 3499 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3500 ) 3501 3502 def _parse_key_constraint_options(self) -> t.List[str]: 3503 options = [] 3504 while True: 3505 if not self._curr: 3506 break 3507 3508 if self._match(TokenType.ON): 3509 action = None 3510 on = self._advance_any() and self._prev.text 3511 3512 if self._match_text_seq("NO", "ACTION"): 3513 action = "NO ACTION" 3514 elif self._match_text_seq("CASCADE"): 3515 action = "CASCADE" 3516 elif self._match_pair(TokenType.SET, TokenType.NULL): 3517 action = "SET NULL" 3518 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3519 action = "SET DEFAULT" 3520 else: 3521 self.raise_error("Invalid key constraint") 3522 3523 options.append(f"ON {on} {action}") 3524 elif self._match_text_seq("NOT", "ENFORCED"): 3525 options.append("NOT ENFORCED") 3526 elif self._match_text_seq("DEFERRABLE"): 3527 options.append("DEFERRABLE") 3528 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3529 options.append("INITIALLY DEFERRED") 3530 elif self._match_text_seq("NORELY"): 3531 options.append("NORELY") 3532 elif self._match_text_seq("MATCH", "FULL"): 3533 options.append("MATCH FULL") 3534 else: 3535 break 3536 3537 return options 3538 3539 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3540 if match and not self._match(TokenType.REFERENCES): 3541 return None 3542 3543 expressions = None 3544 this = self._parse_id_var() 3545 3546 if self._match(TokenType.L_PAREN, advance=False): 3547 expressions = self._parse_wrapped_id_vars() 3548 3549 options = self._parse_key_constraint_options() 3550 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3551 3552 def _parse_foreign_key(self) -> exp.ForeignKey: 3553 expressions = self._parse_wrapped_id_vars() 3554 reference = self._parse_references() 3555 options = {} 3556 3557 while self._match(TokenType.ON): 3558 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3559 self.raise_error("Expected DELETE or UPDATE") 3560 3561 kind = self._prev.text.lower() 3562 3563 if self._match_text_seq("NO", "ACTION"): 3564 action = "NO ACTION" 3565 elif self._match(TokenType.SET): 3566 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3567 action = "SET " + self._prev.text.upper() 3568 else: 3569 self._advance() 3570 action = self._prev.text.upper() 3571 3572 options[kind] = action 3573 3574 return self.expression( 3575 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3576 ) 3577 3578 def _parse_primary_key( 3579 self, wrapped_optional: bool = False, in_props: bool = False 3580 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3581 desc = ( 3582 self._match_set((TokenType.ASC, TokenType.DESC)) 3583 and self._prev.token_type == TokenType.DESC 3584 ) 3585 3586 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3587 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3588 3589 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3590 options = self._parse_key_constraint_options() 3591 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3592 3593 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3594 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3595 return this 3596 3597 bracket_kind = self._prev.token_type 3598 3599 if self._match(TokenType.COLON): 3600 expressions: t.List[t.Optional[exp.Expression]] = [ 3601 self.expression(exp.Slice, expression=self._parse_conjunction()) 3602 ] 3603 else: 3604 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3605 3606 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3607 if bracket_kind == TokenType.L_BRACE: 3608 this = self.expression(exp.Struct, expressions=expressions) 3609 elif not this or this.name.upper() == "ARRAY": 3610 this = self.expression(exp.Array, expressions=expressions) 3611 else: 3612 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3613 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3614 3615 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3616 self.raise_error("Expected ]") 3617 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3618 self.raise_error("Expected }") 3619 3620 self._add_comments(this) 3621 return self._parse_bracket(this) 3622 3623 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3624 if self._match(TokenType.COLON): 3625 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3626 return this 3627 3628 def _parse_case(self) -> t.Optional[exp.Expression]: 3629 ifs = [] 3630 default = None 3631 3632 expression = self._parse_conjunction() 3633 3634 while self._match(TokenType.WHEN): 3635 this = self._parse_conjunction() 3636 self._match(TokenType.THEN) 3637 then = self._parse_conjunction() 3638 ifs.append(self.expression(exp.If, this=this, true=then)) 3639 3640 if self._match(TokenType.ELSE): 3641 default = self._parse_conjunction() 3642 3643 if not self._match(TokenType.END): 3644 self.raise_error("Expected END after CASE", self._prev) 3645 3646 return self._parse_window( 3647 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3648 ) 3649 3650 def _parse_if(self) -> t.Optional[exp.Expression]: 3651 if self._match(TokenType.L_PAREN): 3652 args = self._parse_csv(self._parse_conjunction) 3653 this = self.validate_expression(exp.If.from_arg_list(args), args) 3654 self._match_r_paren() 3655 else: 3656 index = self._index - 1 3657 condition = self._parse_conjunction() 3658 3659 if not condition: 3660 self._retreat(index) 3661 return None 3662 3663 self._match(TokenType.THEN) 3664 true = self._parse_conjunction() 3665 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3666 self._match(TokenType.END) 3667 this = self.expression(exp.If, this=condition, true=true, false=false) 3668 3669 return self._parse_window(this) 3670 3671 def _parse_extract(self) -> exp.Extract: 3672 this = self._parse_function() or self._parse_var() or self._parse_type() 3673 3674 if self._match(TokenType.FROM): 3675 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3676 3677 if not self._match(TokenType.COMMA): 3678 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3679 3680 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3681 3682 def _parse_cast(self, strict: bool) -> exp.Expression: 3683 this = self._parse_conjunction() 3684 3685 if not self._match(TokenType.ALIAS): 3686 if self._match(TokenType.COMMA): 3687 return self.expression( 3688 exp.CastToStrType, this=this, expression=self._parse_string() 3689 ) 3690 else: 3691 self.raise_error("Expected AS after CAST") 3692 3693 to = self._parse_types() 3694 3695 if not to: 3696 self.raise_error("Expected TYPE after CAST") 3697 elif to.this == exp.DataType.Type.CHAR: 3698 if self._match(TokenType.CHARACTER_SET): 3699 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3700 elif to.this in exp.DataType.TEMPORAL_TYPES and self._match(TokenType.FORMAT): 3701 fmt = self._parse_string() 3702 3703 return self.expression( 3704 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3705 this=this, 3706 format=exp.Literal.string( 3707 format_time( 3708 fmt.this if fmt else "", 3709 self.FORMAT_MAPPING or self.TIME_MAPPING, 3710 self.FORMAT_TRIE or self.TIME_TRIE, 3711 ) 3712 ), 3713 ) 3714 3715 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3716 3717 def _parse_concat(self) -> t.Optional[exp.Expression]: 3718 args = self._parse_csv(self._parse_conjunction) 3719 if self.CONCAT_NULL_OUTPUTS_STRING: 3720 args = [ 3721 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3722 for arg in args 3723 if arg 3724 ] 3725 3726 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3727 # we find such a call we replace it with its argument. 3728 if len(args) == 1: 3729 return args[0] 3730 3731 return self.expression( 3732 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3733 ) 3734 3735 def _parse_string_agg(self) -> exp.Expression: 3736 if self._match(TokenType.DISTINCT): 3737 args: t.List[t.Optional[exp.Expression]] = [ 3738 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3739 ] 3740 if self._match(TokenType.COMMA): 3741 args.extend(self._parse_csv(self._parse_conjunction)) 3742 else: 3743 args = self._parse_csv(self._parse_conjunction) 3744 3745 index = self._index 3746 if not self._match(TokenType.R_PAREN): 3747 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3748 return self.expression( 3749 exp.GroupConcat, 3750 this=seq_get(args, 0), 3751 separator=self._parse_order(this=seq_get(args, 1)), 3752 ) 3753 3754 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3755 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3756 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3757 if not self._match_text_seq("WITHIN", "GROUP"): 3758 self._retreat(index) 3759 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3760 3761 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3762 order = self._parse_order(this=seq_get(args, 0)) 3763 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3764 3765 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3766 this = self._parse_bitwise() 3767 3768 if self._match(TokenType.USING): 3769 to: t.Optional[exp.Expression] = self.expression( 3770 exp.CharacterSet, this=self._parse_var() 3771 ) 3772 elif self._match(TokenType.COMMA): 3773 to = self._parse_types() 3774 else: 3775 to = None 3776 3777 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3778 3779 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3780 """ 3781 There are generally two variants of the DECODE function: 3782 3783 - DECODE(bin, charset) 3784 - DECODE(expression, search, result [, search, result] ... [, default]) 3785 3786 The second variant will always be parsed into a CASE expression. Note that NULL 3787 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3788 instead of relying on pattern matching. 3789 """ 3790 args = self._parse_csv(self._parse_conjunction) 3791 3792 if len(args) < 3: 3793 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3794 3795 expression, *expressions = args 3796 if not expression: 3797 return None 3798 3799 ifs = [] 3800 for search, result in zip(expressions[::2], expressions[1::2]): 3801 if not search or not result: 3802 return None 3803 3804 if isinstance(search, exp.Literal): 3805 ifs.append( 3806 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3807 ) 3808 elif isinstance(search, exp.Null): 3809 ifs.append( 3810 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3811 ) 3812 else: 3813 cond = exp.or_( 3814 exp.EQ(this=expression.copy(), expression=search), 3815 exp.and_( 3816 exp.Is(this=expression.copy(), expression=exp.Null()), 3817 exp.Is(this=search.copy(), expression=exp.Null()), 3818 copy=False, 3819 ), 3820 copy=False, 3821 ) 3822 ifs.append(exp.If(this=cond, true=result)) 3823 3824 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3825 3826 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3827 self._match_text_seq("KEY") 3828 key = self._parse_field() 3829 self._match(TokenType.COLON) 3830 self._match_text_seq("VALUE") 3831 value = self._parse_field() 3832 3833 if not key and not value: 3834 return None 3835 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3836 3837 def _parse_json_object(self) -> exp.JSONObject: 3838 star = self._parse_star() 3839 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3840 3841 null_handling = None 3842 if self._match_text_seq("NULL", "ON", "NULL"): 3843 null_handling = "NULL ON NULL" 3844 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3845 null_handling = "ABSENT ON NULL" 3846 3847 unique_keys = None 3848 if self._match_text_seq("WITH", "UNIQUE"): 3849 unique_keys = True 3850 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3851 unique_keys = False 3852 3853 self._match_text_seq("KEYS") 3854 3855 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3856 format_json = self._match_text_seq("FORMAT", "JSON") 3857 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3858 3859 return self.expression( 3860 exp.JSONObject, 3861 expressions=expressions, 3862 null_handling=null_handling, 3863 unique_keys=unique_keys, 3864 return_type=return_type, 3865 format_json=format_json, 3866 encoding=encoding, 3867 ) 3868 3869 def _parse_logarithm(self) -> exp.Func: 3870 # Default argument order is base, expression 3871 args = self._parse_csv(self._parse_range) 3872 3873 if len(args) > 1: 3874 if not self.LOG_BASE_FIRST: 3875 args.reverse() 3876 return exp.Log.from_arg_list(args) 3877 3878 return self.expression( 3879 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3880 ) 3881 3882 def _parse_match_against(self) -> exp.MatchAgainst: 3883 expressions = self._parse_csv(self._parse_column) 3884 3885 self._match_text_seq(")", "AGAINST", "(") 3886 3887 this = self._parse_string() 3888 3889 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3890 modifier = "IN NATURAL LANGUAGE MODE" 3891 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3892 modifier = f"{modifier} WITH QUERY EXPANSION" 3893 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3894 modifier = "IN BOOLEAN MODE" 3895 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3896 modifier = "WITH QUERY EXPANSION" 3897 else: 3898 modifier = None 3899 3900 return self.expression( 3901 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3902 ) 3903 3904 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3905 def _parse_open_json(self) -> exp.OpenJSON: 3906 this = self._parse_bitwise() 3907 path = self._match(TokenType.COMMA) and self._parse_string() 3908 3909 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3910 this = self._parse_field(any_token=True) 3911 kind = self._parse_types() 3912 path = self._parse_string() 3913 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3914 3915 return self.expression( 3916 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3917 ) 3918 3919 expressions = None 3920 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3921 self._match_l_paren() 3922 expressions = self._parse_csv(_parse_open_json_column_def) 3923 3924 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3925 3926 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 3927 args = self._parse_csv(self._parse_bitwise) 3928 3929 if self._match(TokenType.IN): 3930 return self.expression( 3931 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3932 ) 3933 3934 if haystack_first: 3935 haystack = seq_get(args, 0) 3936 needle = seq_get(args, 1) 3937 else: 3938 needle = seq_get(args, 0) 3939 haystack = seq_get(args, 1) 3940 3941 return self.expression( 3942 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 3943 ) 3944 3945 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 3946 args = self._parse_csv(self._parse_table) 3947 return exp.JoinHint(this=func_name.upper(), expressions=args) 3948 3949 def _parse_substring(self) -> exp.Substring: 3950 # Postgres supports the form: substring(string [from int] [for int]) 3951 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3952 3953 args = self._parse_csv(self._parse_bitwise) 3954 3955 if self._match(TokenType.FROM): 3956 args.append(self._parse_bitwise()) 3957 if self._match(TokenType.FOR): 3958 args.append(self._parse_bitwise()) 3959 3960 return self.validate_expression(exp.Substring.from_arg_list(args), args) 3961 3962 def _parse_trim(self) -> exp.Trim: 3963 # https://www.w3resource.com/sql/character-functions/trim.php 3964 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3965 3966 position = None 3967 collation = None 3968 3969 if self._match_texts(self.TRIM_TYPES): 3970 position = self._prev.text.upper() 3971 3972 expression = self._parse_bitwise() 3973 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3974 this = self._parse_bitwise() 3975 else: 3976 this = expression 3977 expression = None 3978 3979 if self._match(TokenType.COLLATE): 3980 collation = self._parse_bitwise() 3981 3982 return self.expression( 3983 exp.Trim, this=this, position=position, expression=expression, collation=collation 3984 ) 3985 3986 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3987 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3988 3989 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3990 return self._parse_window(self._parse_id_var(), alias=True) 3991 3992 def _parse_respect_or_ignore_nulls( 3993 self, this: t.Optional[exp.Expression] 3994 ) -> t.Optional[exp.Expression]: 3995 if self._match_text_seq("IGNORE", "NULLS"): 3996 return self.expression(exp.IgnoreNulls, this=this) 3997 if self._match_text_seq("RESPECT", "NULLS"): 3998 return self.expression(exp.RespectNulls, this=this) 3999 return this 4000 4001 def _parse_window( 4002 self, this: t.Optional[exp.Expression], alias: bool = False 4003 ) -> t.Optional[exp.Expression]: 4004 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4005 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4006 self._match_r_paren() 4007 4008 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4009 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4010 if self._match_text_seq("WITHIN", "GROUP"): 4011 order = self._parse_wrapped(self._parse_order) 4012 this = self.expression(exp.WithinGroup, this=this, expression=order) 4013 4014 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4015 # Some dialects choose to implement and some do not. 4016 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4017 4018 # There is some code above in _parse_lambda that handles 4019 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4020 4021 # The below changes handle 4022 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4023 4024 # Oracle allows both formats 4025 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4026 # and Snowflake chose to do the same for familiarity 4027 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4028 this = self._parse_respect_or_ignore_nulls(this) 4029 4030 # bigquery select from window x AS (partition by ...) 4031 if alias: 4032 over = None 4033 self._match(TokenType.ALIAS) 4034 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4035 return this 4036 else: 4037 over = self._prev.text.upper() 4038 4039 if not self._match(TokenType.L_PAREN): 4040 return self.expression( 4041 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4042 ) 4043 4044 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4045 4046 first = self._match(TokenType.FIRST) 4047 if self._match_text_seq("LAST"): 4048 first = False 4049 4050 partition = self._parse_partition_by() 4051 order = self._parse_order() 4052 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4053 4054 if kind: 4055 self._match(TokenType.BETWEEN) 4056 start = self._parse_window_spec() 4057 self._match(TokenType.AND) 4058 end = self._parse_window_spec() 4059 4060 spec = self.expression( 4061 exp.WindowSpec, 4062 kind=kind, 4063 start=start["value"], 4064 start_side=start["side"], 4065 end=end["value"], 4066 end_side=end["side"], 4067 ) 4068 else: 4069 spec = None 4070 4071 self._match_r_paren() 4072 4073 return self.expression( 4074 exp.Window, 4075 this=this, 4076 partition_by=partition, 4077 order=order, 4078 spec=spec, 4079 alias=window_alias, 4080 over=over, 4081 first=first, 4082 ) 4083 4084 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4085 self._match(TokenType.BETWEEN) 4086 4087 return { 4088 "value": ( 4089 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4090 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4091 or self._parse_bitwise() 4092 ), 4093 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4094 } 4095 4096 def _parse_alias( 4097 self, this: t.Optional[exp.Expression], explicit: bool = False 4098 ) -> t.Optional[exp.Expression]: 4099 any_token = self._match(TokenType.ALIAS) 4100 4101 if explicit and not any_token: 4102 return this 4103 4104 if self._match(TokenType.L_PAREN): 4105 aliases = self.expression( 4106 exp.Aliases, 4107 this=this, 4108 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4109 ) 4110 self._match_r_paren(aliases) 4111 return aliases 4112 4113 alias = self._parse_id_var(any_token) 4114 4115 if alias: 4116 return self.expression(exp.Alias, this=this, alias=alias) 4117 4118 return this 4119 4120 def _parse_id_var( 4121 self, 4122 any_token: bool = True, 4123 tokens: t.Optional[t.Collection[TokenType]] = None, 4124 ) -> t.Optional[exp.Expression]: 4125 identifier = self._parse_identifier() 4126 4127 if identifier: 4128 return identifier 4129 4130 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4131 quoted = self._prev.token_type == TokenType.STRING 4132 return exp.Identifier(this=self._prev.text, quoted=quoted) 4133 4134 return None 4135 4136 def _parse_string(self) -> t.Optional[exp.Expression]: 4137 if self._match(TokenType.STRING): 4138 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4139 return self._parse_placeholder() 4140 4141 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4142 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4143 4144 def _parse_number(self) -> t.Optional[exp.Expression]: 4145 if self._match(TokenType.NUMBER): 4146 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4147 return self._parse_placeholder() 4148 4149 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4150 if self._match(TokenType.IDENTIFIER): 4151 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4152 return self._parse_placeholder() 4153 4154 def _parse_var( 4155 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4156 ) -> t.Optional[exp.Expression]: 4157 if ( 4158 (any_token and self._advance_any()) 4159 or self._match(TokenType.VAR) 4160 or (self._match_set(tokens) if tokens else False) 4161 ): 4162 return self.expression(exp.Var, this=self._prev.text) 4163 return self._parse_placeholder() 4164 4165 def _advance_any(self) -> t.Optional[Token]: 4166 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4167 self._advance() 4168 return self._prev 4169 return None 4170 4171 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4172 return self._parse_var() or self._parse_string() 4173 4174 def _parse_null(self) -> t.Optional[exp.Expression]: 4175 if self._match(TokenType.NULL): 4176 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4177 return None 4178 4179 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4180 if self._match(TokenType.TRUE): 4181 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4182 if self._match(TokenType.FALSE): 4183 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4184 return None 4185 4186 def _parse_star(self) -> t.Optional[exp.Expression]: 4187 if self._match(TokenType.STAR): 4188 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4189 return None 4190 4191 def _parse_parameter(self) -> exp.Parameter: 4192 wrapped = self._match(TokenType.L_BRACE) 4193 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4194 self._match(TokenType.R_BRACE) 4195 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4196 4197 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4198 if self._match_set(self.PLACEHOLDER_PARSERS): 4199 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4200 if placeholder: 4201 return placeholder 4202 self._advance(-1) 4203 return None 4204 4205 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4206 if not self._match(TokenType.EXCEPT): 4207 return None 4208 if self._match(TokenType.L_PAREN, advance=False): 4209 return self._parse_wrapped_csv(self._parse_column) 4210 return self._parse_csv(self._parse_column) 4211 4212 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4213 if not self._match(TokenType.REPLACE): 4214 return None 4215 if self._match(TokenType.L_PAREN, advance=False): 4216 return self._parse_wrapped_csv(self._parse_expression) 4217 return self._parse_csv(self._parse_expression) 4218 4219 def _parse_csv( 4220 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4221 ) -> t.List[t.Optional[exp.Expression]]: 4222 parse_result = parse_method() 4223 items = [parse_result] if parse_result is not None else [] 4224 4225 while self._match(sep): 4226 self._add_comments(parse_result) 4227 parse_result = parse_method() 4228 if parse_result is not None: 4229 items.append(parse_result) 4230 4231 return items 4232 4233 def _parse_tokens( 4234 self, parse_method: t.Callable, expressions: t.Dict 4235 ) -> t.Optional[exp.Expression]: 4236 this = parse_method() 4237 4238 while self._match_set(expressions): 4239 this = self.expression( 4240 expressions[self._prev.token_type], 4241 this=this, 4242 comments=self._prev_comments, 4243 expression=parse_method(), 4244 ) 4245 4246 return this 4247 4248 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4249 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4250 4251 def _parse_wrapped_csv( 4252 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4253 ) -> t.List[t.Optional[exp.Expression]]: 4254 return self._parse_wrapped( 4255 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4256 ) 4257 4258 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4259 wrapped = self._match(TokenType.L_PAREN) 4260 if not wrapped and not optional: 4261 self.raise_error("Expecting (") 4262 parse_result = parse_method() 4263 if wrapped: 4264 self._match_r_paren() 4265 return parse_result 4266 4267 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4268 return self._parse_select() or self._parse_set_operations( 4269 self._parse_expression() if alias else self._parse_conjunction() 4270 ) 4271 4272 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4273 return self._parse_query_modifiers( 4274 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4275 ) 4276 4277 def _parse_transaction(self) -> exp.Transaction: 4278 this = None 4279 if self._match_texts(self.TRANSACTION_KIND): 4280 this = self._prev.text 4281 4282 self._match_texts({"TRANSACTION", "WORK"}) 4283 4284 modes = [] 4285 while True: 4286 mode = [] 4287 while self._match(TokenType.VAR): 4288 mode.append(self._prev.text) 4289 4290 if mode: 4291 modes.append(" ".join(mode)) 4292 if not self._match(TokenType.COMMA): 4293 break 4294 4295 return self.expression(exp.Transaction, this=this, modes=modes) 4296 4297 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4298 chain = None 4299 savepoint = None 4300 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4301 4302 self._match_texts({"TRANSACTION", "WORK"}) 4303 4304 if self._match_text_seq("TO"): 4305 self._match_text_seq("SAVEPOINT") 4306 savepoint = self._parse_id_var() 4307 4308 if self._match(TokenType.AND): 4309 chain = not self._match_text_seq("NO") 4310 self._match_text_seq("CHAIN") 4311 4312 if is_rollback: 4313 return self.expression(exp.Rollback, savepoint=savepoint) 4314 4315 return self.expression(exp.Commit, chain=chain) 4316 4317 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4318 if not self._match_text_seq("ADD"): 4319 return None 4320 4321 self._match(TokenType.COLUMN) 4322 exists_column = self._parse_exists(not_=True) 4323 expression = self._parse_column_def(self._parse_field(any_token=True)) 4324 4325 if expression: 4326 expression.set("exists", exists_column) 4327 4328 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4329 if self._match_texts(("FIRST", "AFTER")): 4330 position = self._prev.text 4331 column_position = self.expression( 4332 exp.ColumnPosition, this=self._parse_column(), position=position 4333 ) 4334 expression.set("position", column_position) 4335 4336 return expression 4337 4338 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4339 drop = self._match(TokenType.DROP) and self._parse_drop() 4340 if drop and not isinstance(drop, exp.Command): 4341 drop.set("kind", drop.args.get("kind", "COLUMN")) 4342 return drop 4343 4344 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4345 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4346 return self.expression( 4347 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4348 ) 4349 4350 def _parse_add_constraint(self) -> exp.AddConstraint: 4351 this = None 4352 kind = self._prev.token_type 4353 4354 if kind == TokenType.CONSTRAINT: 4355 this = self._parse_id_var() 4356 4357 if self._match_text_seq("CHECK"): 4358 expression = self._parse_wrapped(self._parse_conjunction) 4359 enforced = self._match_text_seq("ENFORCED") 4360 4361 return self.expression( 4362 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4363 ) 4364 4365 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4366 expression = self._parse_foreign_key() 4367 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4368 expression = self._parse_primary_key() 4369 else: 4370 expression = None 4371 4372 return self.expression(exp.AddConstraint, this=this, expression=expression) 4373 4374 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4375 index = self._index - 1 4376 4377 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4378 return self._parse_csv(self._parse_add_constraint) 4379 4380 self._retreat(index) 4381 return self._parse_csv(self._parse_add_column) 4382 4383 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4384 self._match(TokenType.COLUMN) 4385 column = self._parse_field(any_token=True) 4386 4387 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4388 return self.expression(exp.AlterColumn, this=column, drop=True) 4389 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4390 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4391 4392 self._match_text_seq("SET", "DATA") 4393 return self.expression( 4394 exp.AlterColumn, 4395 this=column, 4396 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4397 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4398 using=self._match(TokenType.USING) and self._parse_conjunction(), 4399 ) 4400 4401 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4402 index = self._index - 1 4403 4404 partition_exists = self._parse_exists() 4405 if self._match(TokenType.PARTITION, advance=False): 4406 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4407 4408 self._retreat(index) 4409 return self._parse_csv(self._parse_drop_column) 4410 4411 def _parse_alter_table_rename(self) -> exp.RenameTable: 4412 self._match_text_seq("TO") 4413 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4414 4415 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4416 start = self._prev 4417 4418 if not self._match(TokenType.TABLE): 4419 return self._parse_as_command(start) 4420 4421 exists = self._parse_exists() 4422 this = self._parse_table(schema=True) 4423 4424 if self._next: 4425 self._advance() 4426 4427 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4428 if parser: 4429 actions = ensure_list(parser(self)) 4430 4431 if not self._curr: 4432 return self.expression( 4433 exp.AlterTable, 4434 this=this, 4435 exists=exists, 4436 actions=actions, 4437 ) 4438 return self._parse_as_command(start) 4439 4440 def _parse_merge(self) -> exp.Merge: 4441 self._match(TokenType.INTO) 4442 target = self._parse_table() 4443 4444 self._match(TokenType.USING) 4445 using = self._parse_table() 4446 4447 self._match(TokenType.ON) 4448 on = self._parse_conjunction() 4449 4450 whens = [] 4451 while self._match(TokenType.WHEN): 4452 matched = not self._match(TokenType.NOT) 4453 self._match_text_seq("MATCHED") 4454 source = ( 4455 False 4456 if self._match_text_seq("BY", "TARGET") 4457 else self._match_text_seq("BY", "SOURCE") 4458 ) 4459 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4460 4461 self._match(TokenType.THEN) 4462 4463 if self._match(TokenType.INSERT): 4464 _this = self._parse_star() 4465 if _this: 4466 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4467 else: 4468 then = self.expression( 4469 exp.Insert, 4470 this=self._parse_value(), 4471 expression=self._match(TokenType.VALUES) and self._parse_value(), 4472 ) 4473 elif self._match(TokenType.UPDATE): 4474 expressions = self._parse_star() 4475 if expressions: 4476 then = self.expression(exp.Update, expressions=expressions) 4477 else: 4478 then = self.expression( 4479 exp.Update, 4480 expressions=self._match(TokenType.SET) 4481 and self._parse_csv(self._parse_equality), 4482 ) 4483 elif self._match(TokenType.DELETE): 4484 then = self.expression(exp.Var, this=self._prev.text) 4485 else: 4486 then = None 4487 4488 whens.append( 4489 self.expression( 4490 exp.When, 4491 matched=matched, 4492 source=source, 4493 condition=condition, 4494 then=then, 4495 ) 4496 ) 4497 4498 return self.expression( 4499 exp.Merge, 4500 this=target, 4501 using=using, 4502 on=on, 4503 expressions=whens, 4504 ) 4505 4506 def _parse_show(self) -> t.Optional[exp.Expression]: 4507 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4508 if parser: 4509 return parser(self) 4510 self._advance() 4511 return self.expression(exp.Show, this=self._prev.text.upper()) 4512 4513 def _parse_set_item_assignment( 4514 self, kind: t.Optional[str] = None 4515 ) -> t.Optional[exp.Expression]: 4516 index = self._index 4517 4518 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4519 return self._parse_set_transaction(global_=kind == "GLOBAL") 4520 4521 left = self._parse_primary() or self._parse_id_var() 4522 4523 if not self._match_texts(("=", "TO")): 4524 self._retreat(index) 4525 return None 4526 4527 right = self._parse_statement() or self._parse_id_var() 4528 this = self.expression(exp.EQ, this=left, expression=right) 4529 4530 return self.expression(exp.SetItem, this=this, kind=kind) 4531 4532 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4533 self._match_text_seq("TRANSACTION") 4534 characteristics = self._parse_csv( 4535 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4536 ) 4537 return self.expression( 4538 exp.SetItem, 4539 expressions=characteristics, 4540 kind="TRANSACTION", 4541 **{"global": global_}, # type: ignore 4542 ) 4543 4544 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4545 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4546 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4547 4548 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4549 index = self._index 4550 set_ = self.expression( 4551 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4552 ) 4553 4554 if self._curr: 4555 self._retreat(index) 4556 return self._parse_as_command(self._prev) 4557 4558 return set_ 4559 4560 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4561 for option in options: 4562 if self._match_text_seq(*option.split(" ")): 4563 return exp.var(option) 4564 return None 4565 4566 def _parse_as_command(self, start: Token) -> exp.Command: 4567 while self._curr: 4568 self._advance() 4569 text = self._find_sql(start, self._prev) 4570 size = len(start.text) 4571 return exp.Command(this=text[:size], expression=text[size:]) 4572 4573 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4574 settings = [] 4575 4576 self._match_l_paren() 4577 kind = self._parse_id_var() 4578 4579 if self._match(TokenType.L_PAREN): 4580 while True: 4581 key = self._parse_id_var() 4582 value = self._parse_primary() 4583 4584 if not key and value is None: 4585 break 4586 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4587 self._match(TokenType.R_PAREN) 4588 4589 self._match_r_paren() 4590 4591 return self.expression( 4592 exp.DictProperty, 4593 this=this, 4594 kind=kind.this if kind else None, 4595 settings=settings, 4596 ) 4597 4598 def _parse_dict_range(self, this: str) -> exp.DictRange: 4599 self._match_l_paren() 4600 has_min = self._match_text_seq("MIN") 4601 if has_min: 4602 min = self._parse_var() or self._parse_primary() 4603 self._match_text_seq("MAX") 4604 max = self._parse_var() or self._parse_primary() 4605 else: 4606 max = self._parse_var() or self._parse_primary() 4607 min = exp.Literal.number(0) 4608 self._match_r_paren() 4609 return self.expression(exp.DictRange, this=this, min=min, max=max) 4610 4611 def _find_parser( 4612 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4613 ) -> t.Optional[t.Callable]: 4614 if not self._curr: 4615 return None 4616 4617 index = self._index 4618 this = [] 4619 while True: 4620 # The current token might be multiple words 4621 curr = self._curr.text.upper() 4622 key = curr.split(" ") 4623 this.append(curr) 4624 4625 self._advance() 4626 result, trie = in_trie(trie, key) 4627 if result == TrieResult.FAILED: 4628 break 4629 4630 if result == TrieResult.EXISTS: 4631 subparser = parsers[" ".join(this)] 4632 return subparser 4633 4634 self._retreat(index) 4635 return None 4636 4637 def _match(self, token_type, advance=True, expression=None): 4638 if not self._curr: 4639 return None 4640 4641 if self._curr.token_type == token_type: 4642 if advance: 4643 self._advance() 4644 self._add_comments(expression) 4645 return True 4646 4647 return None 4648 4649 def _match_set(self, types, advance=True): 4650 if not self._curr: 4651 return None 4652 4653 if self._curr.token_type in types: 4654 if advance: 4655 self._advance() 4656 return True 4657 4658 return None 4659 4660 def _match_pair(self, token_type_a, token_type_b, advance=True): 4661 if not self._curr or not self._next: 4662 return None 4663 4664 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4665 if advance: 4666 self._advance(2) 4667 return True 4668 4669 return None 4670 4671 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4672 if not self._match(TokenType.L_PAREN, expression=expression): 4673 self.raise_error("Expecting (") 4674 4675 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4676 if not self._match(TokenType.R_PAREN, expression=expression): 4677 self.raise_error("Expecting )") 4678 4679 def _match_texts(self, texts, advance=True): 4680 if self._curr and self._curr.text.upper() in texts: 4681 if advance: 4682 self._advance() 4683 return True 4684 return False 4685 4686 def _match_text_seq(self, *texts, advance=True): 4687 index = self._index 4688 for text in texts: 4689 if self._curr and self._curr.text.upper() == text: 4690 self._advance() 4691 else: 4692 self._retreat(index) 4693 return False 4694 4695 if not advance: 4696 self._retreat(index) 4697 4698 return True 4699 4700 @t.overload 4701 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4702 ... 4703 4704 @t.overload 4705 def _replace_columns_with_dots( 4706 self, this: t.Optional[exp.Expression] 4707 ) -> t.Optional[exp.Expression]: 4708 ... 4709 4710 def _replace_columns_with_dots(self, this): 4711 if isinstance(this, exp.Dot): 4712 exp.replace_children(this, self._replace_columns_with_dots) 4713 elif isinstance(this, exp.Column): 4714 exp.replace_children(this, self._replace_columns_with_dots) 4715 table = this.args.get("table") 4716 this = ( 4717 self.expression(exp.Dot, this=table, expression=this.this) 4718 if table 4719 else self.expression(exp.Var, this=this.name) 4720 ) 4721 elif isinstance(this, exp.Identifier): 4722 this = self.expression(exp.Var, this=this.name) 4723 4724 return this 4725 4726 def _replace_lambda( 4727 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4728 ) -> t.Optional[exp.Expression]: 4729 if not node: 4730 return node 4731 4732 for column in node.find_all(exp.Column): 4733 if column.parts[0].name in lambda_variables: 4734 dot_or_id = column.to_dot() if column.table else column.this 4735 parent = column.parent 4736 4737 while isinstance(parent, exp.Dot): 4738 if not isinstance(parent.parent, exp.Dot): 4739 parent.replace(dot_or_id) 4740 break 4741 parent = parent.parent 4742 else: 4743 if column is node: 4744 node = dot_or_id 4745 else: 4746 column.replace(dot_or_id) 4747 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
832 def __init__( 833 self, 834 error_level: t.Optional[ErrorLevel] = None, 835 error_message_context: int = 100, 836 max_errors: int = 3, 837 ): 838 self.error_level = error_level or ErrorLevel.IMMEDIATE 839 self.error_message_context = error_message_context 840 self.max_errors = max_errors 841 self.reset()
853 def parse( 854 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 855 ) -> t.List[t.Optional[exp.Expression]]: 856 """ 857 Parses a list of tokens and returns a list of syntax trees, one tree 858 per parsed SQL statement. 859 860 Args: 861 raw_tokens: The list of tokens. 862 sql: The original SQL string, used to produce helpful debug messages. 863 864 Returns: 865 The list of the produced syntax trees. 866 """ 867 return self._parse( 868 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 869 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
871 def parse_into( 872 self, 873 expression_types: exp.IntoType, 874 raw_tokens: t.List[Token], 875 sql: t.Optional[str] = None, 876 ) -> t.List[t.Optional[exp.Expression]]: 877 """ 878 Parses a list of tokens into a given Expression type. If a collection of Expression 879 types is given instead, this method will try to parse the token list into each one 880 of them, stopping at the first for which the parsing succeeds. 881 882 Args: 883 expression_types: The expression type(s) to try and parse the token list into. 884 raw_tokens: The list of tokens. 885 sql: The original SQL string, used to produce helpful debug messages. 886 887 Returns: 888 The target Expression. 889 """ 890 errors = [] 891 for expression_type in ensure_list(expression_types): 892 parser = self.EXPRESSION_PARSERS.get(expression_type) 893 if not parser: 894 raise TypeError(f"No parser registered for {expression_type}") 895 896 try: 897 return self._parse(parser, raw_tokens, sql) 898 except ParseError as e: 899 e.errors[0]["into_expression"] = expression_type 900 errors.append(e) 901 902 raise ParseError( 903 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 904 errors=merge_errors(errors), 905 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
942 def check_errors(self) -> None: 943 """Logs or raises any found errors, depending on the chosen error level setting.""" 944 if self.error_level == ErrorLevel.WARN: 945 for error in self.errors: 946 logger.error(str(error)) 947 elif self.error_level == ErrorLevel.RAISE and self.errors: 948 raise ParseError( 949 concat_messages(self.errors, self.max_errors), 950 errors=merge_errors(self.errors), 951 )
Logs or raises any found errors, depending on the chosen error level setting.
953 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 954 """ 955 Appends an error in the list of recorded errors or raises it, depending on the chosen 956 error level setting. 957 """ 958 token = token or self._curr or self._prev or Token.string("") 959 start = token.start 960 end = token.end + 1 961 start_context = self.sql[max(start - self.error_message_context, 0) : start] 962 highlight = self.sql[start:end] 963 end_context = self.sql[end : end + self.error_message_context] 964 965 error = ParseError.new( 966 f"{message}. Line {token.line}, Col: {token.col}.\n" 967 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 968 description=message, 969 line=token.line, 970 col=token.col, 971 start_context=start_context, 972 highlight=highlight, 973 end_context=end_context, 974 ) 975 976 if self.error_level == ErrorLevel.IMMEDIATE: 977 raise error 978 979 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
981 def expression( 982 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 983 ) -> E: 984 """ 985 Creates a new, validated Expression. 986 987 Args: 988 exp_class: The expression class to instantiate. 989 comments: An optional list of comments to attach to the expression. 990 kwargs: The arguments to set for the expression along with their respective values. 991 992 Returns: 993 The target expression. 994 """ 995 instance = exp_class(**kwargs) 996 instance.add_comments(comments) if comments else self._add_comments(instance) 997 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1004 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1005 """ 1006 Validates an Expression, making sure that all its mandatory arguments are set. 1007 1008 Args: 1009 expression: The expression to validate. 1010 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1011 1012 Returns: 1013 The validated expression. 1014 """ 1015 if self.error_level != ErrorLevel.IGNORE: 1016 for error_message in expression.error_messages(args): 1017 self.raise_error(error_message) 1018 1019 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.