sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.INET, 200 TokenType.IPADDRESS, 201 TokenType.IPPREFIX, 202 TokenType.UNKNOWN, 203 TokenType.NULL, 204 *ENUM_TYPE_TOKENS, 205 *NESTED_TYPE_TOKENS, 206 } 207 208 SUBQUERY_PREDICATES = { 209 TokenType.ANY: exp.Any, 210 TokenType.ALL: exp.All, 211 TokenType.EXISTS: exp.Exists, 212 TokenType.SOME: exp.Any, 213 } 214 215 RESERVED_KEYWORDS = { 216 *Tokenizer.SINGLE_TOKENS.values(), 217 TokenType.SELECT, 218 } 219 220 DB_CREATABLES = { 221 TokenType.DATABASE, 222 TokenType.SCHEMA, 223 TokenType.TABLE, 224 TokenType.VIEW, 225 TokenType.DICTIONARY, 226 } 227 228 CREATABLES = { 229 TokenType.COLUMN, 230 TokenType.FUNCTION, 231 TokenType.INDEX, 232 TokenType.PROCEDURE, 233 *DB_CREATABLES, 234 } 235 236 # Tokens that can represent identifiers 237 ID_VAR_TOKENS = { 238 TokenType.VAR, 239 TokenType.ANTI, 240 TokenType.APPLY, 241 TokenType.ASC, 242 TokenType.AUTO_INCREMENT, 243 TokenType.BEGIN, 244 TokenType.CACHE, 245 TokenType.CASE, 246 TokenType.COLLATE, 247 TokenType.COMMAND, 248 TokenType.COMMENT, 249 TokenType.COMMIT, 250 TokenType.CONSTRAINT, 251 TokenType.DEFAULT, 252 TokenType.DELETE, 253 TokenType.DESC, 254 TokenType.DESCRIBE, 255 TokenType.DICTIONARY, 256 TokenType.DIV, 257 TokenType.END, 258 TokenType.EXECUTE, 259 TokenType.ESCAPE, 260 TokenType.FALSE, 261 TokenType.FIRST, 262 TokenType.FILTER, 263 TokenType.FORMAT, 264 TokenType.FULL, 265 TokenType.IS, 266 TokenType.ISNULL, 267 TokenType.INTERVAL, 268 TokenType.KEEP, 269 TokenType.LEFT, 270 TokenType.LOAD, 271 TokenType.MERGE, 272 TokenType.NATURAL, 273 TokenType.NEXT, 274 TokenType.OFFSET, 275 TokenType.ORDINALITY, 276 TokenType.OVERWRITE, 277 TokenType.PARTITION, 278 TokenType.PERCENT, 279 TokenType.PIVOT, 280 TokenType.PRAGMA, 281 TokenType.RANGE, 282 TokenType.REFERENCES, 283 TokenType.RIGHT, 284 TokenType.ROW, 285 TokenType.ROWS, 286 TokenType.SEMI, 287 TokenType.SET, 288 TokenType.SETTINGS, 289 TokenType.SHOW, 290 TokenType.TEMPORARY, 291 TokenType.TOP, 292 TokenType.TRUE, 293 TokenType.UNIQUE, 294 TokenType.UNPIVOT, 295 TokenType.UPDATE, 296 TokenType.VOLATILE, 297 TokenType.WINDOW, 298 *CREATABLES, 299 *SUBQUERY_PREDICATES, 300 *TYPE_TOKENS, 301 *NO_PAREN_FUNCTIONS, 302 } 303 304 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 305 306 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 307 TokenType.APPLY, 308 TokenType.ASOF, 309 TokenType.FULL, 310 TokenType.LEFT, 311 TokenType.LOCK, 312 TokenType.NATURAL, 313 TokenType.OFFSET, 314 TokenType.RIGHT, 315 TokenType.WINDOW, 316 } 317 318 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 319 320 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 321 322 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 323 324 FUNC_TOKENS = { 325 TokenType.COMMAND, 326 TokenType.CURRENT_DATE, 327 TokenType.CURRENT_DATETIME, 328 TokenType.CURRENT_TIMESTAMP, 329 TokenType.CURRENT_TIME, 330 TokenType.CURRENT_USER, 331 TokenType.FILTER, 332 TokenType.FIRST, 333 TokenType.FORMAT, 334 TokenType.GLOB, 335 TokenType.IDENTIFIER, 336 TokenType.INDEX, 337 TokenType.ISNULL, 338 TokenType.ILIKE, 339 TokenType.INSERT, 340 TokenType.LIKE, 341 TokenType.MERGE, 342 TokenType.OFFSET, 343 TokenType.PRIMARY_KEY, 344 TokenType.RANGE, 345 TokenType.REPLACE, 346 TokenType.RLIKE, 347 TokenType.ROW, 348 TokenType.UNNEST, 349 TokenType.VAR, 350 TokenType.LEFT, 351 TokenType.RIGHT, 352 TokenType.DATE, 353 TokenType.DATETIME, 354 TokenType.TABLE, 355 TokenType.TIMESTAMP, 356 TokenType.TIMESTAMPTZ, 357 TokenType.WINDOW, 358 TokenType.XOR, 359 *TYPE_TOKENS, 360 *SUBQUERY_PREDICATES, 361 } 362 363 CONJUNCTION = { 364 TokenType.AND: exp.And, 365 TokenType.OR: exp.Or, 366 } 367 368 EQUALITY = { 369 TokenType.EQ: exp.EQ, 370 TokenType.NEQ: exp.NEQ, 371 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 372 } 373 374 COMPARISON = { 375 TokenType.GT: exp.GT, 376 TokenType.GTE: exp.GTE, 377 TokenType.LT: exp.LT, 378 TokenType.LTE: exp.LTE, 379 } 380 381 BITWISE = { 382 TokenType.AMP: exp.BitwiseAnd, 383 TokenType.CARET: exp.BitwiseXor, 384 TokenType.PIPE: exp.BitwiseOr, 385 TokenType.DPIPE: exp.DPipe, 386 } 387 388 TERM = { 389 TokenType.DASH: exp.Sub, 390 TokenType.PLUS: exp.Add, 391 TokenType.MOD: exp.Mod, 392 TokenType.COLLATE: exp.Collate, 393 } 394 395 FACTOR = { 396 TokenType.DIV: exp.IntDiv, 397 TokenType.LR_ARROW: exp.Distance, 398 TokenType.SLASH: exp.Div, 399 TokenType.STAR: exp.Mul, 400 } 401 402 TIMES = { 403 TokenType.TIME, 404 TokenType.TIMETZ, 405 } 406 407 TIMESTAMPS = { 408 TokenType.TIMESTAMP, 409 TokenType.TIMESTAMPTZ, 410 TokenType.TIMESTAMPLTZ, 411 *TIMES, 412 } 413 414 SET_OPERATIONS = { 415 TokenType.UNION, 416 TokenType.INTERSECT, 417 TokenType.EXCEPT, 418 } 419 420 JOIN_METHODS = { 421 TokenType.NATURAL, 422 TokenType.ASOF, 423 } 424 425 JOIN_SIDES = { 426 TokenType.LEFT, 427 TokenType.RIGHT, 428 TokenType.FULL, 429 } 430 431 JOIN_KINDS = { 432 TokenType.INNER, 433 TokenType.OUTER, 434 TokenType.CROSS, 435 TokenType.SEMI, 436 TokenType.ANTI, 437 } 438 439 JOIN_HINTS: t.Set[str] = set() 440 441 LAMBDAS = { 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_conjunction(), 446 {node.name for node in expressions}, 447 ), 448 expressions=expressions, 449 ), 450 TokenType.FARROW: lambda self, expressions: self.expression( 451 exp.Kwarg, 452 this=exp.var(expressions[0].name), 453 expression=self._parse_conjunction(), 454 ), 455 } 456 457 COLUMN_OPERATORS = { 458 TokenType.DOT: None, 459 TokenType.DCOLON: lambda self, this, to: self.expression( 460 exp.Cast if self.STRICT_CAST else exp.TryCast, 461 this=this, 462 to=to, 463 ), 464 TokenType.ARROW: lambda self, this, path: self.expression( 465 exp.JSONExtract, 466 this=this, 467 expression=path, 468 ), 469 TokenType.DARROW: lambda self, this, path: self.expression( 470 exp.JSONExtractScalar, 471 this=this, 472 expression=path, 473 ), 474 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 480 exp.JSONBExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 485 exp.JSONBContains, 486 this=this, 487 expression=key, 488 ), 489 } 490 491 EXPRESSION_PARSERS = { 492 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 493 exp.Column: lambda self: self._parse_column(), 494 exp.Condition: lambda self: self._parse_conjunction(), 495 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 496 exp.Expression: lambda self: self._parse_statement(), 497 exp.From: lambda self: self._parse_from(), 498 exp.Group: lambda self: self._parse_group(), 499 exp.Having: lambda self: self._parse_having(), 500 exp.Identifier: lambda self: self._parse_id_var(), 501 exp.Join: lambda self: self._parse_join(), 502 exp.Lambda: lambda self: self._parse_lambda(), 503 exp.Lateral: lambda self: self._parse_lateral(), 504 exp.Limit: lambda self: self._parse_limit(), 505 exp.Offset: lambda self: self._parse_offset(), 506 exp.Order: lambda self: self._parse_order(), 507 exp.Ordered: lambda self: self._parse_ordered(), 508 exp.Properties: lambda self: self._parse_properties(), 509 exp.Qualify: lambda self: self._parse_qualify(), 510 exp.Returning: lambda self: self._parse_returning(), 511 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 512 exp.Table: lambda self: self._parse_table_parts(), 513 exp.TableAlias: lambda self: self._parse_table_alias(), 514 exp.Where: lambda self: self._parse_where(), 515 exp.Window: lambda self: self._parse_named_window(), 516 exp.With: lambda self: self._parse_with(), 517 "JOIN_TYPE": lambda self: self._parse_join_parts(), 518 } 519 520 STATEMENT_PARSERS = { 521 TokenType.ALTER: lambda self: self._parse_alter(), 522 TokenType.BEGIN: lambda self: self._parse_transaction(), 523 TokenType.CACHE: lambda self: self._parse_cache(), 524 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 525 TokenType.COMMENT: lambda self: self._parse_comment(), 526 TokenType.CREATE: lambda self: self._parse_create(), 527 TokenType.DELETE: lambda self: self._parse_delete(), 528 TokenType.DESC: lambda self: self._parse_describe(), 529 TokenType.DESCRIBE: lambda self: self._parse_describe(), 530 TokenType.DROP: lambda self: self._parse_drop(), 531 TokenType.INSERT: lambda self: self._parse_insert(), 532 TokenType.LOAD: lambda self: self._parse_load(), 533 TokenType.MERGE: lambda self: self._parse_merge(), 534 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 535 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 536 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 537 TokenType.SET: lambda self: self._parse_set(), 538 TokenType.UNCACHE: lambda self: self._parse_uncache(), 539 TokenType.UPDATE: lambda self: self._parse_update(), 540 TokenType.USE: lambda self: self.expression( 541 exp.Use, 542 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 543 and exp.var(self._prev.text), 544 this=self._parse_table(schema=False), 545 ), 546 } 547 548 UNARY_PARSERS = { 549 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 550 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 551 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 552 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 553 } 554 555 PRIMARY_PARSERS = { 556 TokenType.STRING: lambda self, token: self.expression( 557 exp.Literal, this=token.text, is_string=True 558 ), 559 TokenType.NUMBER: lambda self, token: self.expression( 560 exp.Literal, this=token.text, is_string=False 561 ), 562 TokenType.STAR: lambda self, _: self.expression( 563 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 564 ), 565 TokenType.NULL: lambda self, _: self.expression(exp.Null), 566 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 567 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 568 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 569 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 570 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 571 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 572 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 573 exp.National, this=token.text 574 ), 575 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 576 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 577 } 578 579 PLACEHOLDER_PARSERS = { 580 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 581 TokenType.PARAMETER: lambda self: self._parse_parameter(), 582 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 583 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 584 else None, 585 } 586 587 RANGE_PARSERS = { 588 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 589 TokenType.GLOB: binary_range_parser(exp.Glob), 590 TokenType.ILIKE: binary_range_parser(exp.ILike), 591 TokenType.IN: lambda self, this: self._parse_in(this), 592 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 593 TokenType.IS: lambda self, this: self._parse_is(this), 594 TokenType.LIKE: binary_range_parser(exp.Like), 595 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 596 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 597 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 598 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 599 } 600 601 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 602 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 603 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 604 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 605 "CHARACTER SET": lambda self: self._parse_character_set(), 606 "CHECKSUM": lambda self: self._parse_checksum(), 607 "CLUSTER BY": lambda self: self._parse_cluster(), 608 "CLUSTERED": lambda self: self._parse_clustered_by(), 609 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 610 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 611 "COPY": lambda self: self._parse_copy_property(), 612 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 613 "DEFINER": lambda self: self._parse_definer(), 614 "DETERMINISTIC": lambda self: self.expression( 615 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 616 ), 617 "DISTKEY": lambda self: self._parse_distkey(), 618 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 619 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 620 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 621 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 622 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 623 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 624 "FREESPACE": lambda self: self._parse_freespace(), 625 "HEAP": lambda self: self.expression(exp.HeapProperty), 626 "IMMUTABLE": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 630 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 631 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 632 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 633 "LIKE": lambda self: self._parse_create_like(), 634 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 635 "LOCK": lambda self: self._parse_locking(), 636 "LOCKING": lambda self: self._parse_locking(), 637 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 638 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 639 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 640 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 641 "NO": lambda self: self._parse_no_property(), 642 "ON": lambda self: self._parse_on_property(), 643 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 644 "PARTITION BY": lambda self: self._parse_partitioned_by(), 645 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 647 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 648 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 649 "RETURNS": lambda self: self._parse_returns(), 650 "ROW": lambda self: self._parse_row(), 651 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 652 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 653 "SETTINGS": lambda self: self.expression( 654 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 655 ), 656 "SORTKEY": lambda self: self._parse_sortkey(), 657 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 658 "STABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("STABLE") 660 ), 661 "STORED": lambda self: self._parse_stored(), 662 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 663 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 664 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 665 "TO": lambda self: self._parse_to_table(), 666 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 667 "TTL": lambda self: self._parse_ttl(), 668 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 669 "VOLATILE": lambda self: self._parse_volatile_property(), 670 "WITH": lambda self: self._parse_with_property(), 671 } 672 673 CONSTRAINT_PARSERS = { 674 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 675 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 676 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 677 "CHARACTER SET": lambda self: self.expression( 678 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "CHECK": lambda self: self.expression( 681 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 682 ), 683 "COLLATE": lambda self: self.expression( 684 exp.CollateColumnConstraint, this=self._parse_var() 685 ), 686 "COMMENT": lambda self: self.expression( 687 exp.CommentColumnConstraint, this=self._parse_string() 688 ), 689 "COMPRESS": lambda self: self._parse_compress(), 690 "CLUSTERED": lambda self: self.expression( 691 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 692 ), 693 "NONCLUSTERED": lambda self: self.expression( 694 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 695 ), 696 "DEFAULT": lambda self: self.expression( 697 exp.DefaultColumnConstraint, this=self._parse_bitwise() 698 ), 699 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 700 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 701 "FORMAT": lambda self: self.expression( 702 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "GENERATED": lambda self: self._parse_generated_as_identity(), 705 "IDENTITY": lambda self: self._parse_auto_increment(), 706 "INLINE": lambda self: self._parse_inline(), 707 "LIKE": lambda self: self._parse_create_like(), 708 "NOT": lambda self: self._parse_not_constraint(), 709 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 710 "ON": lambda self: ( 711 self._match(TokenType.UPDATE) 712 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 713 ) 714 or self.expression(exp.OnProperty, this=self._parse_id_var()), 715 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(), 717 "REFERENCES": lambda self: self._parse_references(match=False), 718 "TITLE": lambda self: self.expression( 719 exp.TitleColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 722 "UNIQUE": lambda self: self._parse_unique(), 723 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 724 "WITH": lambda self: self.expression( 725 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 726 ), 727 } 728 729 ALTER_PARSERS = { 730 "ADD": lambda self: self._parse_alter_table_add(), 731 "ALTER": lambda self: self._parse_alter_table_alter(), 732 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 733 "DROP": lambda self: self._parse_alter_table_drop(), 734 "RENAME": lambda self: self._parse_alter_table_rename(), 735 } 736 737 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 738 739 NO_PAREN_FUNCTION_PARSERS = { 740 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 741 "CASE": lambda self: self._parse_case(), 742 "IF": lambda self: self._parse_if(), 743 "NEXT": lambda self: self._parse_next_value_for(), 744 } 745 746 INVALID_FUNC_NAME_TOKENS = { 747 TokenType.IDENTIFIER, 748 TokenType.STRING, 749 } 750 751 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 752 753 FUNCTION_PARSERS = { 754 "ANY_VALUE": lambda self: self._parse_any_value(), 755 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 756 "CONCAT": lambda self: self._parse_concat(), 757 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 758 "DECODE": lambda self: self._parse_decode(), 759 "EXTRACT": lambda self: self._parse_extract(), 760 "JSON_OBJECT": lambda self: self._parse_json_object(), 761 "LOG": lambda self: self._parse_logarithm(), 762 "MATCH": lambda self: self._parse_match_against(), 763 "OPENJSON": lambda self: self._parse_open_json(), 764 "POSITION": lambda self: self._parse_position(), 765 "SAFE_CAST": lambda self: self._parse_cast(False), 766 "STRING_AGG": lambda self: self._parse_string_agg(), 767 "SUBSTRING": lambda self: self._parse_substring(), 768 "TRIM": lambda self: self._parse_trim(), 769 "TRY_CAST": lambda self: self._parse_cast(False), 770 "TRY_CONVERT": lambda self: self._parse_convert(False), 771 } 772 773 QUERY_MODIFIER_PARSERS = { 774 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 775 TokenType.WHERE: lambda self: ("where", self._parse_where()), 776 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 777 TokenType.HAVING: lambda self: ("having", self._parse_having()), 778 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 779 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 780 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 781 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 782 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 783 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 784 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 785 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 786 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.CLUSTER_BY: lambda self: ( 789 "cluster", 790 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 ), 792 TokenType.DISTRIBUTE_BY: lambda self: ( 793 "distribute", 794 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 795 ), 796 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 797 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 798 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 799 } 800 801 SET_PARSERS = { 802 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 803 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 804 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 805 "TRANSACTION": lambda self: self._parse_set_transaction(), 806 } 807 808 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 809 810 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 811 812 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 813 814 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 815 816 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 817 818 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 819 TRANSACTION_CHARACTERISTICS = { 820 "ISOLATION LEVEL REPEATABLE READ", 821 "ISOLATION LEVEL READ COMMITTED", 822 "ISOLATION LEVEL READ UNCOMMITTED", 823 "ISOLATION LEVEL SERIALIZABLE", 824 "READ WRITE", 825 "READ ONLY", 826 } 827 828 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 829 830 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 831 832 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 833 834 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 835 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 836 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 837 838 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 839 840 DISTINCT_TOKENS = {TokenType.DISTINCT} 841 842 STRICT_CAST = True 843 844 # A NULL arg in CONCAT yields NULL by default 845 CONCAT_NULL_OUTPUTS_STRING = False 846 847 PREFIXED_PIVOT_COLUMNS = False 848 IDENTIFY_PIVOT_STRINGS = False 849 850 LOG_BASE_FIRST = True 851 LOG_DEFAULTS_TO_LN = False 852 853 SUPPORTS_USER_DEFINED_TYPES = True 854 855 __slots__ = ( 856 "error_level", 857 "error_message_context", 858 "max_errors", 859 "sql", 860 "errors", 861 "_tokens", 862 "_index", 863 "_curr", 864 "_next", 865 "_prev", 866 "_prev_comments", 867 "_tokenizer", 868 ) 869 870 # Autofilled 871 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 872 INDEX_OFFSET: int = 0 873 UNNEST_COLUMN_ONLY: bool = False 874 ALIAS_POST_TABLESAMPLE: bool = False 875 STRICT_STRING_CONCAT = False 876 NORMALIZE_FUNCTIONS = "upper" 877 NULL_ORDERING: str = "nulls_are_small" 878 SHOW_TRIE: t.Dict = {} 879 SET_TRIE: t.Dict = {} 880 FORMAT_MAPPING: t.Dict[str, str] = {} 881 FORMAT_TRIE: t.Dict = {} 882 TIME_MAPPING: t.Dict[str, str] = {} 883 TIME_TRIE: t.Dict = {} 884 885 def __init__( 886 self, 887 error_level: t.Optional[ErrorLevel] = None, 888 error_message_context: int = 100, 889 max_errors: int = 3, 890 ): 891 self.error_level = error_level or ErrorLevel.IMMEDIATE 892 self.error_message_context = error_message_context 893 self.max_errors = max_errors 894 self._tokenizer = self.TOKENIZER_CLASS() 895 self.reset() 896 897 def reset(self): 898 self.sql = "" 899 self.errors = [] 900 self._tokens = [] 901 self._index = 0 902 self._curr = None 903 self._next = None 904 self._prev = None 905 self._prev_comments = None 906 907 def parse( 908 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 909 ) -> t.List[t.Optional[exp.Expression]]: 910 """ 911 Parses a list of tokens and returns a list of syntax trees, one tree 912 per parsed SQL statement. 913 914 Args: 915 raw_tokens: The list of tokens. 916 sql: The original SQL string, used to produce helpful debug messages. 917 918 Returns: 919 The list of the produced syntax trees. 920 """ 921 return self._parse( 922 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 923 ) 924 925 def parse_into( 926 self, 927 expression_types: exp.IntoType, 928 raw_tokens: t.List[Token], 929 sql: t.Optional[str] = None, 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens into a given Expression type. If a collection of Expression 933 types is given instead, this method will try to parse the token list into each one 934 of them, stopping at the first for which the parsing succeeds. 935 936 Args: 937 expression_types: The expression type(s) to try and parse the token list into. 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The target Expression. 943 """ 944 errors = [] 945 for expression_type in ensure_list(expression_types): 946 parser = self.EXPRESSION_PARSERS.get(expression_type) 947 if not parser: 948 raise TypeError(f"No parser registered for {expression_type}") 949 950 try: 951 return self._parse(parser, raw_tokens, sql) 952 except ParseError as e: 953 e.errors[0]["into_expression"] = expression_type 954 errors.append(e) 955 956 raise ParseError( 957 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 958 errors=merge_errors(errors), 959 ) from errors[-1] 960 961 def _parse( 962 self, 963 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 964 raw_tokens: t.List[Token], 965 sql: t.Optional[str] = None, 966 ) -> t.List[t.Optional[exp.Expression]]: 967 self.reset() 968 self.sql = sql or "" 969 970 total = len(raw_tokens) 971 chunks: t.List[t.List[Token]] = [[]] 972 973 for i, token in enumerate(raw_tokens): 974 if token.token_type == TokenType.SEMICOLON: 975 if i < total - 1: 976 chunks.append([]) 977 else: 978 chunks[-1].append(token) 979 980 expressions = [] 981 982 for tokens in chunks: 983 self._index = -1 984 self._tokens = tokens 985 self._advance() 986 987 expressions.append(parse_method(self)) 988 989 if self._index < len(self._tokens): 990 self.raise_error("Invalid expression / Unexpected token") 991 992 self.check_errors() 993 994 return expressions 995 996 def check_errors(self) -> None: 997 """Logs or raises any found errors, depending on the chosen error level setting.""" 998 if self.error_level == ErrorLevel.WARN: 999 for error in self.errors: 1000 logger.error(str(error)) 1001 elif self.error_level == ErrorLevel.RAISE and self.errors: 1002 raise ParseError( 1003 concat_messages(self.errors, self.max_errors), 1004 errors=merge_errors(self.errors), 1005 ) 1006 1007 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1008 """ 1009 Appends an error in the list of recorded errors or raises it, depending on the chosen 1010 error level setting. 1011 """ 1012 token = token or self._curr or self._prev or Token.string("") 1013 start = token.start 1014 end = token.end + 1 1015 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1016 highlight = self.sql[start:end] 1017 end_context = self.sql[end : end + self.error_message_context] 1018 1019 error = ParseError.new( 1020 f"{message}. Line {token.line}, Col: {token.col}.\n" 1021 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1022 description=message, 1023 line=token.line, 1024 col=token.col, 1025 start_context=start_context, 1026 highlight=highlight, 1027 end_context=end_context, 1028 ) 1029 1030 if self.error_level == ErrorLevel.IMMEDIATE: 1031 raise error 1032 1033 self.errors.append(error) 1034 1035 def expression( 1036 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1037 ) -> E: 1038 """ 1039 Creates a new, validated Expression. 1040 1041 Args: 1042 exp_class: The expression class to instantiate. 1043 comments: An optional list of comments to attach to the expression. 1044 kwargs: The arguments to set for the expression along with their respective values. 1045 1046 Returns: 1047 The target expression. 1048 """ 1049 instance = exp_class(**kwargs) 1050 instance.add_comments(comments) if comments else self._add_comments(instance) 1051 return self.validate_expression(instance) 1052 1053 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1054 if expression and self._prev_comments: 1055 expression.add_comments(self._prev_comments) 1056 self._prev_comments = None 1057 1058 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1059 """ 1060 Validates an Expression, making sure that all its mandatory arguments are set. 1061 1062 Args: 1063 expression: The expression to validate. 1064 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1065 1066 Returns: 1067 The validated expression. 1068 """ 1069 if self.error_level != ErrorLevel.IGNORE: 1070 for error_message in expression.error_messages(args): 1071 self.raise_error(error_message) 1072 1073 return expression 1074 1075 def _find_sql(self, start: Token, end: Token) -> str: 1076 return self.sql[start.start : end.end + 1] 1077 1078 def _advance(self, times: int = 1) -> None: 1079 self._index += times 1080 self._curr = seq_get(self._tokens, self._index) 1081 self._next = seq_get(self._tokens, self._index + 1) 1082 1083 if self._index > 0: 1084 self._prev = self._tokens[self._index - 1] 1085 self._prev_comments = self._prev.comments 1086 else: 1087 self._prev = None 1088 self._prev_comments = None 1089 1090 def _retreat(self, index: int) -> None: 1091 if index != self._index: 1092 self._advance(index - self._index) 1093 1094 def _parse_command(self) -> exp.Command: 1095 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1096 1097 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1098 start = self._prev 1099 exists = self._parse_exists() if allow_exists else None 1100 1101 self._match(TokenType.ON) 1102 1103 kind = self._match_set(self.CREATABLES) and self._prev 1104 if not kind: 1105 return self._parse_as_command(start) 1106 1107 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1108 this = self._parse_user_defined_function(kind=kind.token_type) 1109 elif kind.token_type == TokenType.TABLE: 1110 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1111 elif kind.token_type == TokenType.COLUMN: 1112 this = self._parse_column() 1113 else: 1114 this = self._parse_id_var() 1115 1116 self._match(TokenType.IS) 1117 1118 return self.expression( 1119 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1120 ) 1121 1122 def _parse_to_table( 1123 self, 1124 ) -> exp.ToTableProperty: 1125 table = self._parse_table_parts(schema=True) 1126 return self.expression(exp.ToTableProperty, this=table) 1127 1128 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1129 def _parse_ttl(self) -> exp.Expression: 1130 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1131 this = self._parse_bitwise() 1132 1133 if self._match_text_seq("DELETE"): 1134 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1135 if self._match_text_seq("RECOMPRESS"): 1136 return self.expression( 1137 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1138 ) 1139 if self._match_text_seq("TO", "DISK"): 1140 return self.expression( 1141 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1142 ) 1143 if self._match_text_seq("TO", "VOLUME"): 1144 return self.expression( 1145 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1146 ) 1147 1148 return this 1149 1150 expressions = self._parse_csv(_parse_ttl_action) 1151 where = self._parse_where() 1152 group = self._parse_group() 1153 1154 aggregates = None 1155 if group and self._match(TokenType.SET): 1156 aggregates = self._parse_csv(self._parse_set_item) 1157 1158 return self.expression( 1159 exp.MergeTreeTTL, 1160 expressions=expressions, 1161 where=where, 1162 group=group, 1163 aggregates=aggregates, 1164 ) 1165 1166 def _parse_statement(self) -> t.Optional[exp.Expression]: 1167 if self._curr is None: 1168 return None 1169 1170 if self._match_set(self.STATEMENT_PARSERS): 1171 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1172 1173 if self._match_set(Tokenizer.COMMANDS): 1174 return self._parse_command() 1175 1176 expression = self._parse_expression() 1177 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1178 return self._parse_query_modifiers(expression) 1179 1180 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1181 start = self._prev 1182 temporary = self._match(TokenType.TEMPORARY) 1183 materialized = self._match_text_seq("MATERIALIZED") 1184 1185 kind = self._match_set(self.CREATABLES) and self._prev.text 1186 if not kind: 1187 return self._parse_as_command(start) 1188 1189 return self.expression( 1190 exp.Drop, 1191 comments=start.comments, 1192 exists=exists or self._parse_exists(), 1193 this=self._parse_table(schema=True), 1194 kind=kind, 1195 temporary=temporary, 1196 materialized=materialized, 1197 cascade=self._match_text_seq("CASCADE"), 1198 constraints=self._match_text_seq("CONSTRAINTS"), 1199 purge=self._match_text_seq("PURGE"), 1200 ) 1201 1202 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1203 return ( 1204 self._match_text_seq("IF") 1205 and (not not_ or self._match(TokenType.NOT)) 1206 and self._match(TokenType.EXISTS) 1207 ) 1208 1209 def _parse_create(self) -> exp.Create | exp.Command: 1210 # Note: this can't be None because we've matched a statement parser 1211 start = self._prev 1212 comments = self._prev_comments 1213 1214 replace = start.text.upper() == "REPLACE" or self._match_pair( 1215 TokenType.OR, TokenType.REPLACE 1216 ) 1217 unique = self._match(TokenType.UNIQUE) 1218 1219 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1220 self._advance() 1221 1222 properties = None 1223 create_token = self._match_set(self.CREATABLES) and self._prev 1224 1225 if not create_token: 1226 # exp.Properties.Location.POST_CREATE 1227 properties = self._parse_properties() 1228 create_token = self._match_set(self.CREATABLES) and self._prev 1229 1230 if not properties or not create_token: 1231 return self._parse_as_command(start) 1232 1233 exists = self._parse_exists(not_=True) 1234 this = None 1235 expression: t.Optional[exp.Expression] = None 1236 indexes = None 1237 no_schema_binding = None 1238 begin = None 1239 clone = None 1240 1241 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1242 nonlocal properties 1243 if properties and temp_props: 1244 properties.expressions.extend(temp_props.expressions) 1245 elif temp_props: 1246 properties = temp_props 1247 1248 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1249 this = self._parse_user_defined_function(kind=create_token.token_type) 1250 1251 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1252 extend_props(self._parse_properties()) 1253 1254 self._match(TokenType.ALIAS) 1255 1256 if self._match(TokenType.COMMAND): 1257 expression = self._parse_as_command(self._prev) 1258 else: 1259 begin = self._match(TokenType.BEGIN) 1260 return_ = self._match_text_seq("RETURN") 1261 expression = self._parse_statement() 1262 1263 if return_: 1264 expression = self.expression(exp.Return, this=expression) 1265 elif create_token.token_type == TokenType.INDEX: 1266 this = self._parse_index(index=self._parse_id_var()) 1267 elif create_token.token_type in self.DB_CREATABLES: 1268 table_parts = self._parse_table_parts(schema=True) 1269 1270 # exp.Properties.Location.POST_NAME 1271 self._match(TokenType.COMMA) 1272 extend_props(self._parse_properties(before=True)) 1273 1274 this = self._parse_schema(this=table_parts) 1275 1276 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1277 extend_props(self._parse_properties()) 1278 1279 self._match(TokenType.ALIAS) 1280 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1281 # exp.Properties.Location.POST_ALIAS 1282 extend_props(self._parse_properties()) 1283 1284 expression = self._parse_ddl_select() 1285 1286 if create_token.token_type == TokenType.TABLE: 1287 # exp.Properties.Location.POST_EXPRESSION 1288 extend_props(self._parse_properties()) 1289 1290 indexes = [] 1291 while True: 1292 index = self._parse_index() 1293 1294 # exp.Properties.Location.POST_INDEX 1295 extend_props(self._parse_properties()) 1296 1297 if not index: 1298 break 1299 else: 1300 self._match(TokenType.COMMA) 1301 indexes.append(index) 1302 elif create_token.token_type == TokenType.VIEW: 1303 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1304 no_schema_binding = True 1305 1306 shallow = self._match_text_seq("SHALLOW") 1307 1308 if self._match_text_seq("CLONE"): 1309 clone = self._parse_table(schema=True) 1310 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1311 clone_kind = ( 1312 self._match(TokenType.L_PAREN) 1313 and self._match_texts(self.CLONE_KINDS) 1314 and self._prev.text.upper() 1315 ) 1316 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1317 self._match(TokenType.R_PAREN) 1318 clone = self.expression( 1319 exp.Clone, 1320 this=clone, 1321 when=when, 1322 kind=clone_kind, 1323 shallow=shallow, 1324 expression=clone_expression, 1325 ) 1326 1327 return self.expression( 1328 exp.Create, 1329 comments=comments, 1330 this=this, 1331 kind=create_token.text, 1332 replace=replace, 1333 unique=unique, 1334 expression=expression, 1335 exists=exists, 1336 properties=properties, 1337 indexes=indexes, 1338 no_schema_binding=no_schema_binding, 1339 begin=begin, 1340 clone=clone, 1341 ) 1342 1343 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1344 # only used for teradata currently 1345 self._match(TokenType.COMMA) 1346 1347 kwargs = { 1348 "no": self._match_text_seq("NO"), 1349 "dual": self._match_text_seq("DUAL"), 1350 "before": self._match_text_seq("BEFORE"), 1351 "default": self._match_text_seq("DEFAULT"), 1352 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1353 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1354 "after": self._match_text_seq("AFTER"), 1355 "minimum": self._match_texts(("MIN", "MINIMUM")), 1356 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1357 } 1358 1359 if self._match_texts(self.PROPERTY_PARSERS): 1360 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1361 try: 1362 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1363 except TypeError: 1364 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1365 1366 return None 1367 1368 def _parse_property(self) -> t.Optional[exp.Expression]: 1369 if self._match_texts(self.PROPERTY_PARSERS): 1370 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1371 1372 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1373 return self._parse_character_set(default=True) 1374 1375 if self._match_text_seq("COMPOUND", "SORTKEY"): 1376 return self._parse_sortkey(compound=True) 1377 1378 if self._match_text_seq("SQL", "SECURITY"): 1379 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1380 1381 assignment = self._match_pair( 1382 TokenType.VAR, TokenType.EQ, advance=False 1383 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1384 1385 if assignment: 1386 key = self._parse_var_or_string() 1387 self._match(TokenType.EQ) 1388 return self.expression( 1389 exp.Property, 1390 this=key, 1391 value=self._parse_column() or self._parse_var(any_token=True), 1392 ) 1393 1394 return None 1395 1396 def _parse_stored(self) -> exp.FileFormatProperty: 1397 self._match(TokenType.ALIAS) 1398 1399 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1400 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1401 1402 return self.expression( 1403 exp.FileFormatProperty, 1404 this=self.expression( 1405 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1406 ) 1407 if input_format or output_format 1408 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1409 ) 1410 1411 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1412 self._match(TokenType.EQ) 1413 self._match(TokenType.ALIAS) 1414 return self.expression(exp_class, this=self._parse_field()) 1415 1416 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1417 properties = [] 1418 while True: 1419 if before: 1420 prop = self._parse_property_before() 1421 else: 1422 prop = self._parse_property() 1423 1424 if not prop: 1425 break 1426 for p in ensure_list(prop): 1427 properties.append(p) 1428 1429 if properties: 1430 return self.expression(exp.Properties, expressions=properties) 1431 1432 return None 1433 1434 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1435 return self.expression( 1436 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1437 ) 1438 1439 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1440 if self._index >= 2: 1441 pre_volatile_token = self._tokens[self._index - 2] 1442 else: 1443 pre_volatile_token = None 1444 1445 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1446 return exp.VolatileProperty() 1447 1448 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1449 1450 def _parse_with_property( 1451 self, 1452 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1453 if self._match(TokenType.L_PAREN, advance=False): 1454 return self._parse_wrapped_csv(self._parse_property) 1455 1456 if self._match_text_seq("JOURNAL"): 1457 return self._parse_withjournaltable() 1458 1459 if self._match_text_seq("DATA"): 1460 return self._parse_withdata(no=False) 1461 elif self._match_text_seq("NO", "DATA"): 1462 return self._parse_withdata(no=True) 1463 1464 if not self._next: 1465 return None 1466 1467 return self._parse_withisolatedloading() 1468 1469 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1470 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1471 self._match(TokenType.EQ) 1472 1473 user = self._parse_id_var() 1474 self._match(TokenType.PARAMETER) 1475 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1476 1477 if not user or not host: 1478 return None 1479 1480 return exp.DefinerProperty(this=f"{user}@{host}") 1481 1482 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1483 self._match(TokenType.TABLE) 1484 self._match(TokenType.EQ) 1485 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1486 1487 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1488 return self.expression(exp.LogProperty, no=no) 1489 1490 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1491 return self.expression(exp.JournalProperty, **kwargs) 1492 1493 def _parse_checksum(self) -> exp.ChecksumProperty: 1494 self._match(TokenType.EQ) 1495 1496 on = None 1497 if self._match(TokenType.ON): 1498 on = True 1499 elif self._match_text_seq("OFF"): 1500 on = False 1501 1502 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1503 1504 def _parse_cluster(self) -> exp.Cluster: 1505 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1506 1507 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1508 self._match_text_seq("BY") 1509 1510 self._match_l_paren() 1511 expressions = self._parse_csv(self._parse_column) 1512 self._match_r_paren() 1513 1514 if self._match_text_seq("SORTED", "BY"): 1515 self._match_l_paren() 1516 sorted_by = self._parse_csv(self._parse_ordered) 1517 self._match_r_paren() 1518 else: 1519 sorted_by = None 1520 1521 self._match(TokenType.INTO) 1522 buckets = self._parse_number() 1523 self._match_text_seq("BUCKETS") 1524 1525 return self.expression( 1526 exp.ClusteredByProperty, 1527 expressions=expressions, 1528 sorted_by=sorted_by, 1529 buckets=buckets, 1530 ) 1531 1532 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1533 if not self._match_text_seq("GRANTS"): 1534 self._retreat(self._index - 1) 1535 return None 1536 1537 return self.expression(exp.CopyGrantsProperty) 1538 1539 def _parse_freespace(self) -> exp.FreespaceProperty: 1540 self._match(TokenType.EQ) 1541 return self.expression( 1542 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1543 ) 1544 1545 def _parse_mergeblockratio( 1546 self, no: bool = False, default: bool = False 1547 ) -> exp.MergeBlockRatioProperty: 1548 if self._match(TokenType.EQ): 1549 return self.expression( 1550 exp.MergeBlockRatioProperty, 1551 this=self._parse_number(), 1552 percent=self._match(TokenType.PERCENT), 1553 ) 1554 1555 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1556 1557 def _parse_datablocksize( 1558 self, 1559 default: t.Optional[bool] = None, 1560 minimum: t.Optional[bool] = None, 1561 maximum: t.Optional[bool] = None, 1562 ) -> exp.DataBlocksizeProperty: 1563 self._match(TokenType.EQ) 1564 size = self._parse_number() 1565 1566 units = None 1567 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1568 units = self._prev.text 1569 1570 return self.expression( 1571 exp.DataBlocksizeProperty, 1572 size=size, 1573 units=units, 1574 default=default, 1575 minimum=minimum, 1576 maximum=maximum, 1577 ) 1578 1579 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1580 self._match(TokenType.EQ) 1581 always = self._match_text_seq("ALWAYS") 1582 manual = self._match_text_seq("MANUAL") 1583 never = self._match_text_seq("NEVER") 1584 default = self._match_text_seq("DEFAULT") 1585 1586 autotemp = None 1587 if self._match_text_seq("AUTOTEMP"): 1588 autotemp = self._parse_schema() 1589 1590 return self.expression( 1591 exp.BlockCompressionProperty, 1592 always=always, 1593 manual=manual, 1594 never=never, 1595 default=default, 1596 autotemp=autotemp, 1597 ) 1598 1599 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1600 no = self._match_text_seq("NO") 1601 concurrent = self._match_text_seq("CONCURRENT") 1602 self._match_text_seq("ISOLATED", "LOADING") 1603 for_all = self._match_text_seq("FOR", "ALL") 1604 for_insert = self._match_text_seq("FOR", "INSERT") 1605 for_none = self._match_text_seq("FOR", "NONE") 1606 return self.expression( 1607 exp.IsolatedLoadingProperty, 1608 no=no, 1609 concurrent=concurrent, 1610 for_all=for_all, 1611 for_insert=for_insert, 1612 for_none=for_none, 1613 ) 1614 1615 def _parse_locking(self) -> exp.LockingProperty: 1616 if self._match(TokenType.TABLE): 1617 kind = "TABLE" 1618 elif self._match(TokenType.VIEW): 1619 kind = "VIEW" 1620 elif self._match(TokenType.ROW): 1621 kind = "ROW" 1622 elif self._match_text_seq("DATABASE"): 1623 kind = "DATABASE" 1624 else: 1625 kind = None 1626 1627 if kind in ("DATABASE", "TABLE", "VIEW"): 1628 this = self._parse_table_parts() 1629 else: 1630 this = None 1631 1632 if self._match(TokenType.FOR): 1633 for_or_in = "FOR" 1634 elif self._match(TokenType.IN): 1635 for_or_in = "IN" 1636 else: 1637 for_or_in = None 1638 1639 if self._match_text_seq("ACCESS"): 1640 lock_type = "ACCESS" 1641 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1642 lock_type = "EXCLUSIVE" 1643 elif self._match_text_seq("SHARE"): 1644 lock_type = "SHARE" 1645 elif self._match_text_seq("READ"): 1646 lock_type = "READ" 1647 elif self._match_text_seq("WRITE"): 1648 lock_type = "WRITE" 1649 elif self._match_text_seq("CHECKSUM"): 1650 lock_type = "CHECKSUM" 1651 else: 1652 lock_type = None 1653 1654 override = self._match_text_seq("OVERRIDE") 1655 1656 return self.expression( 1657 exp.LockingProperty, 1658 this=this, 1659 kind=kind, 1660 for_or_in=for_or_in, 1661 lock_type=lock_type, 1662 override=override, 1663 ) 1664 1665 def _parse_partition_by(self) -> t.List[exp.Expression]: 1666 if self._match(TokenType.PARTITION_BY): 1667 return self._parse_csv(self._parse_conjunction) 1668 return [] 1669 1670 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1671 self._match(TokenType.EQ) 1672 return self.expression( 1673 exp.PartitionedByProperty, 1674 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1675 ) 1676 1677 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1678 if self._match_text_seq("AND", "STATISTICS"): 1679 statistics = True 1680 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1681 statistics = False 1682 else: 1683 statistics = None 1684 1685 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1686 1687 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1688 if self._match_text_seq("PRIMARY", "INDEX"): 1689 return exp.NoPrimaryIndexProperty() 1690 return None 1691 1692 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1693 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1694 return exp.OnCommitProperty() 1695 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1696 return exp.OnCommitProperty(delete=True) 1697 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1698 1699 def _parse_distkey(self) -> exp.DistKeyProperty: 1700 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1701 1702 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1703 table = self._parse_table(schema=True) 1704 1705 options = [] 1706 while self._match_texts(("INCLUDING", "EXCLUDING")): 1707 this = self._prev.text.upper() 1708 1709 id_var = self._parse_id_var() 1710 if not id_var: 1711 return None 1712 1713 options.append( 1714 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1715 ) 1716 1717 return self.expression(exp.LikeProperty, this=table, expressions=options) 1718 1719 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1720 return self.expression( 1721 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1722 ) 1723 1724 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1725 self._match(TokenType.EQ) 1726 return self.expression( 1727 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1728 ) 1729 1730 def _parse_returns(self) -> exp.ReturnsProperty: 1731 value: t.Optional[exp.Expression] 1732 is_table = self._match(TokenType.TABLE) 1733 1734 if is_table: 1735 if self._match(TokenType.LT): 1736 value = self.expression( 1737 exp.Schema, 1738 this="TABLE", 1739 expressions=self._parse_csv(self._parse_struct_types), 1740 ) 1741 if not self._match(TokenType.GT): 1742 self.raise_error("Expecting >") 1743 else: 1744 value = self._parse_schema(exp.var("TABLE")) 1745 else: 1746 value = self._parse_types() 1747 1748 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1749 1750 def _parse_describe(self) -> exp.Describe: 1751 kind = self._match_set(self.CREATABLES) and self._prev.text 1752 this = self._parse_table() 1753 return self.expression(exp.Describe, this=this, kind=kind) 1754 1755 def _parse_insert(self) -> exp.Insert: 1756 comments = ensure_list(self._prev_comments) 1757 overwrite = self._match(TokenType.OVERWRITE) 1758 ignore = self._match(TokenType.IGNORE) 1759 local = self._match_text_seq("LOCAL") 1760 alternative = None 1761 1762 if self._match_text_seq("DIRECTORY"): 1763 this: t.Optional[exp.Expression] = self.expression( 1764 exp.Directory, 1765 this=self._parse_var_or_string(), 1766 local=local, 1767 row_format=self._parse_row_format(match_row=True), 1768 ) 1769 else: 1770 if self._match(TokenType.OR): 1771 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1772 1773 self._match(TokenType.INTO) 1774 comments += ensure_list(self._prev_comments) 1775 self._match(TokenType.TABLE) 1776 this = self._parse_table(schema=True) 1777 1778 returning = self._parse_returning() 1779 1780 return self.expression( 1781 exp.Insert, 1782 comments=comments, 1783 this=this, 1784 by_name=self._match_text_seq("BY", "NAME"), 1785 exists=self._parse_exists(), 1786 partition=self._parse_partition(), 1787 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1788 and self._parse_conjunction(), 1789 expression=self._parse_ddl_select(), 1790 conflict=self._parse_on_conflict(), 1791 returning=returning or self._parse_returning(), 1792 overwrite=overwrite, 1793 alternative=alternative, 1794 ignore=ignore, 1795 ) 1796 1797 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1798 conflict = self._match_text_seq("ON", "CONFLICT") 1799 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1800 1801 if not conflict and not duplicate: 1802 return None 1803 1804 nothing = None 1805 expressions = None 1806 key = None 1807 constraint = None 1808 1809 if conflict: 1810 if self._match_text_seq("ON", "CONSTRAINT"): 1811 constraint = self._parse_id_var() 1812 else: 1813 key = self._parse_csv(self._parse_value) 1814 1815 self._match_text_seq("DO") 1816 if self._match_text_seq("NOTHING"): 1817 nothing = True 1818 else: 1819 self._match(TokenType.UPDATE) 1820 self._match(TokenType.SET) 1821 expressions = self._parse_csv(self._parse_equality) 1822 1823 return self.expression( 1824 exp.OnConflict, 1825 duplicate=duplicate, 1826 expressions=expressions, 1827 nothing=nothing, 1828 key=key, 1829 constraint=constraint, 1830 ) 1831 1832 def _parse_returning(self) -> t.Optional[exp.Returning]: 1833 if not self._match(TokenType.RETURNING): 1834 return None 1835 return self.expression( 1836 exp.Returning, 1837 expressions=self._parse_csv(self._parse_expression), 1838 into=self._match(TokenType.INTO) and self._parse_table_part(), 1839 ) 1840 1841 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1842 if not self._match(TokenType.FORMAT): 1843 return None 1844 return self._parse_row_format() 1845 1846 def _parse_row_format( 1847 self, match_row: bool = False 1848 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1849 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1850 return None 1851 1852 if self._match_text_seq("SERDE"): 1853 this = self._parse_string() 1854 1855 serde_properties = None 1856 if self._match(TokenType.SERDE_PROPERTIES): 1857 serde_properties = self.expression( 1858 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1859 ) 1860 1861 return self.expression( 1862 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1863 ) 1864 1865 self._match_text_seq("DELIMITED") 1866 1867 kwargs = {} 1868 1869 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1870 kwargs["fields"] = self._parse_string() 1871 if self._match_text_seq("ESCAPED", "BY"): 1872 kwargs["escaped"] = self._parse_string() 1873 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1874 kwargs["collection_items"] = self._parse_string() 1875 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1876 kwargs["map_keys"] = self._parse_string() 1877 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1878 kwargs["lines"] = self._parse_string() 1879 if self._match_text_seq("NULL", "DEFINED", "AS"): 1880 kwargs["null"] = self._parse_string() 1881 1882 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1883 1884 def _parse_load(self) -> exp.LoadData | exp.Command: 1885 if self._match_text_seq("DATA"): 1886 local = self._match_text_seq("LOCAL") 1887 self._match_text_seq("INPATH") 1888 inpath = self._parse_string() 1889 overwrite = self._match(TokenType.OVERWRITE) 1890 self._match_pair(TokenType.INTO, TokenType.TABLE) 1891 1892 return self.expression( 1893 exp.LoadData, 1894 this=self._parse_table(schema=True), 1895 local=local, 1896 overwrite=overwrite, 1897 inpath=inpath, 1898 partition=self._parse_partition(), 1899 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1900 serde=self._match_text_seq("SERDE") and self._parse_string(), 1901 ) 1902 return self._parse_as_command(self._prev) 1903 1904 def _parse_delete(self) -> exp.Delete: 1905 # This handles MySQL's "Multiple-Table Syntax" 1906 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1907 tables = None 1908 comments = self._prev_comments 1909 if not self._match(TokenType.FROM, advance=False): 1910 tables = self._parse_csv(self._parse_table) or None 1911 1912 returning = self._parse_returning() 1913 1914 return self.expression( 1915 exp.Delete, 1916 comments=comments, 1917 tables=tables, 1918 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1919 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1920 where=self._parse_where(), 1921 returning=returning or self._parse_returning(), 1922 limit=self._parse_limit(), 1923 ) 1924 1925 def _parse_update(self) -> exp.Update: 1926 comments = self._prev_comments 1927 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1928 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1929 returning = self._parse_returning() 1930 return self.expression( 1931 exp.Update, 1932 comments=comments, 1933 **{ # type: ignore 1934 "this": this, 1935 "expressions": expressions, 1936 "from": self._parse_from(joins=True), 1937 "where": self._parse_where(), 1938 "returning": returning or self._parse_returning(), 1939 "limit": self._parse_limit(), 1940 }, 1941 ) 1942 1943 def _parse_uncache(self) -> exp.Uncache: 1944 if not self._match(TokenType.TABLE): 1945 self.raise_error("Expecting TABLE after UNCACHE") 1946 1947 return self.expression( 1948 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1949 ) 1950 1951 def _parse_cache(self) -> exp.Cache: 1952 lazy = self._match_text_seq("LAZY") 1953 self._match(TokenType.TABLE) 1954 table = self._parse_table(schema=True) 1955 1956 options = [] 1957 if self._match_text_seq("OPTIONS"): 1958 self._match_l_paren() 1959 k = self._parse_string() 1960 self._match(TokenType.EQ) 1961 v = self._parse_string() 1962 options = [k, v] 1963 self._match_r_paren() 1964 1965 self._match(TokenType.ALIAS) 1966 return self.expression( 1967 exp.Cache, 1968 this=table, 1969 lazy=lazy, 1970 options=options, 1971 expression=self._parse_select(nested=True), 1972 ) 1973 1974 def _parse_partition(self) -> t.Optional[exp.Partition]: 1975 if not self._match(TokenType.PARTITION): 1976 return None 1977 1978 return self.expression( 1979 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1980 ) 1981 1982 def _parse_value(self) -> exp.Tuple: 1983 if self._match(TokenType.L_PAREN): 1984 expressions = self._parse_csv(self._parse_conjunction) 1985 self._match_r_paren() 1986 return self.expression(exp.Tuple, expressions=expressions) 1987 1988 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1989 # https://prestodb.io/docs/current/sql/values.html 1990 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1991 1992 def _parse_projections(self) -> t.List[exp.Expression]: 1993 return self._parse_expressions() 1994 1995 def _parse_select( 1996 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1997 ) -> t.Optional[exp.Expression]: 1998 cte = self._parse_with() 1999 2000 if cte: 2001 this = self._parse_statement() 2002 2003 if not this: 2004 self.raise_error("Failed to parse any statement following CTE") 2005 return cte 2006 2007 if "with" in this.arg_types: 2008 this.set("with", cte) 2009 else: 2010 self.raise_error(f"{this.key} does not support CTE") 2011 this = cte 2012 2013 return this 2014 2015 # duckdb supports leading with FROM x 2016 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2017 2018 if self._match(TokenType.SELECT): 2019 comments = self._prev_comments 2020 2021 hint = self._parse_hint() 2022 all_ = self._match(TokenType.ALL) 2023 distinct = self._match_set(self.DISTINCT_TOKENS) 2024 2025 kind = ( 2026 self._match(TokenType.ALIAS) 2027 and self._match_texts(("STRUCT", "VALUE")) 2028 and self._prev.text 2029 ) 2030 2031 if distinct: 2032 distinct = self.expression( 2033 exp.Distinct, 2034 on=self._parse_value() if self._match(TokenType.ON) else None, 2035 ) 2036 2037 if all_ and distinct: 2038 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2039 2040 limit = self._parse_limit(top=True) 2041 projections = self._parse_projections() 2042 2043 this = self.expression( 2044 exp.Select, 2045 kind=kind, 2046 hint=hint, 2047 distinct=distinct, 2048 expressions=projections, 2049 limit=limit, 2050 ) 2051 this.comments = comments 2052 2053 into = self._parse_into() 2054 if into: 2055 this.set("into", into) 2056 2057 if not from_: 2058 from_ = self._parse_from() 2059 2060 if from_: 2061 this.set("from", from_) 2062 2063 this = self._parse_query_modifiers(this) 2064 elif (table or nested) and self._match(TokenType.L_PAREN): 2065 if self._match(TokenType.PIVOT): 2066 this = self._parse_simplified_pivot() 2067 elif self._match(TokenType.FROM): 2068 this = exp.select("*").from_( 2069 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2070 ) 2071 else: 2072 this = self._parse_table() if table else self._parse_select(nested=True) 2073 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2074 2075 self._match_r_paren() 2076 2077 # We return early here so that the UNION isn't attached to the subquery by the 2078 # following call to _parse_set_operations, but instead becomes the parent node 2079 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2080 elif self._match(TokenType.VALUES): 2081 this = self.expression( 2082 exp.Values, 2083 expressions=self._parse_csv(self._parse_value), 2084 alias=self._parse_table_alias(), 2085 ) 2086 elif from_: 2087 this = exp.select("*").from_(from_.this, copy=False) 2088 else: 2089 this = None 2090 2091 return self._parse_set_operations(this) 2092 2093 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2094 if not skip_with_token and not self._match(TokenType.WITH): 2095 return None 2096 2097 comments = self._prev_comments 2098 recursive = self._match(TokenType.RECURSIVE) 2099 2100 expressions = [] 2101 while True: 2102 expressions.append(self._parse_cte()) 2103 2104 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2105 break 2106 else: 2107 self._match(TokenType.WITH) 2108 2109 return self.expression( 2110 exp.With, comments=comments, expressions=expressions, recursive=recursive 2111 ) 2112 2113 def _parse_cte(self) -> exp.CTE: 2114 alias = self._parse_table_alias() 2115 if not alias or not alias.this: 2116 self.raise_error("Expected CTE to have alias") 2117 2118 self._match(TokenType.ALIAS) 2119 return self.expression( 2120 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2121 ) 2122 2123 def _parse_table_alias( 2124 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2125 ) -> t.Optional[exp.TableAlias]: 2126 any_token = self._match(TokenType.ALIAS) 2127 alias = ( 2128 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2129 or self._parse_string_as_identifier() 2130 ) 2131 2132 index = self._index 2133 if self._match(TokenType.L_PAREN): 2134 columns = self._parse_csv(self._parse_function_parameter) 2135 self._match_r_paren() if columns else self._retreat(index) 2136 else: 2137 columns = None 2138 2139 if not alias and not columns: 2140 return None 2141 2142 return self.expression(exp.TableAlias, this=alias, columns=columns) 2143 2144 def _parse_subquery( 2145 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2146 ) -> t.Optional[exp.Subquery]: 2147 if not this: 2148 return None 2149 2150 return self.expression( 2151 exp.Subquery, 2152 this=this, 2153 pivots=self._parse_pivots(), 2154 alias=self._parse_table_alias() if parse_alias else None, 2155 ) 2156 2157 def _parse_query_modifiers( 2158 self, this: t.Optional[exp.Expression] 2159 ) -> t.Optional[exp.Expression]: 2160 if isinstance(this, self.MODIFIABLES): 2161 for join in iter(self._parse_join, None): 2162 this.append("joins", join) 2163 for lateral in iter(self._parse_lateral, None): 2164 this.append("laterals", lateral) 2165 2166 while True: 2167 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2168 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2169 key, expression = parser(self) 2170 2171 if expression: 2172 this.set(key, expression) 2173 if key == "limit": 2174 offset = expression.args.pop("offset", None) 2175 if offset: 2176 this.set("offset", exp.Offset(expression=offset)) 2177 continue 2178 break 2179 return this 2180 2181 def _parse_hint(self) -> t.Optional[exp.Hint]: 2182 if self._match(TokenType.HINT): 2183 hints = [] 2184 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2185 hints.extend(hint) 2186 2187 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2188 self.raise_error("Expected */ after HINT") 2189 2190 return self.expression(exp.Hint, expressions=hints) 2191 2192 return None 2193 2194 def _parse_into(self) -> t.Optional[exp.Into]: 2195 if not self._match(TokenType.INTO): 2196 return None 2197 2198 temp = self._match(TokenType.TEMPORARY) 2199 unlogged = self._match_text_seq("UNLOGGED") 2200 self._match(TokenType.TABLE) 2201 2202 return self.expression( 2203 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2204 ) 2205 2206 def _parse_from( 2207 self, joins: bool = False, skip_from_token: bool = False 2208 ) -> t.Optional[exp.From]: 2209 if not skip_from_token and not self._match(TokenType.FROM): 2210 return None 2211 2212 return self.expression( 2213 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2214 ) 2215 2216 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2217 if not self._match(TokenType.MATCH_RECOGNIZE): 2218 return None 2219 2220 self._match_l_paren() 2221 2222 partition = self._parse_partition_by() 2223 order = self._parse_order() 2224 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2225 2226 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2227 rows = exp.var("ONE ROW PER MATCH") 2228 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2229 text = "ALL ROWS PER MATCH" 2230 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2231 text += f" SHOW EMPTY MATCHES" 2232 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2233 text += f" OMIT EMPTY MATCHES" 2234 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2235 text += f" WITH UNMATCHED ROWS" 2236 rows = exp.var(text) 2237 else: 2238 rows = None 2239 2240 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2241 text = "AFTER MATCH SKIP" 2242 if self._match_text_seq("PAST", "LAST", "ROW"): 2243 text += f" PAST LAST ROW" 2244 elif self._match_text_seq("TO", "NEXT", "ROW"): 2245 text += f" TO NEXT ROW" 2246 elif self._match_text_seq("TO", "FIRST"): 2247 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2248 elif self._match_text_seq("TO", "LAST"): 2249 text += f" TO LAST {self._advance_any().text}" # type: ignore 2250 after = exp.var(text) 2251 else: 2252 after = None 2253 2254 if self._match_text_seq("PATTERN"): 2255 self._match_l_paren() 2256 2257 if not self._curr: 2258 self.raise_error("Expecting )", self._curr) 2259 2260 paren = 1 2261 start = self._curr 2262 2263 while self._curr and paren > 0: 2264 if self._curr.token_type == TokenType.L_PAREN: 2265 paren += 1 2266 if self._curr.token_type == TokenType.R_PAREN: 2267 paren -= 1 2268 2269 end = self._prev 2270 self._advance() 2271 2272 if paren > 0: 2273 self.raise_error("Expecting )", self._curr) 2274 2275 pattern = exp.var(self._find_sql(start, end)) 2276 else: 2277 pattern = None 2278 2279 define = ( 2280 self._parse_csv( 2281 lambda: self.expression( 2282 exp.Alias, 2283 alias=self._parse_id_var(any_token=True), 2284 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2285 ) 2286 ) 2287 if self._match_text_seq("DEFINE") 2288 else None 2289 ) 2290 2291 self._match_r_paren() 2292 2293 return self.expression( 2294 exp.MatchRecognize, 2295 partition_by=partition, 2296 order=order, 2297 measures=measures, 2298 rows=rows, 2299 after=after, 2300 pattern=pattern, 2301 define=define, 2302 alias=self._parse_table_alias(), 2303 ) 2304 2305 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2306 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2307 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2308 2309 if outer_apply or cross_apply: 2310 this = self._parse_select(table=True) 2311 view = None 2312 outer = not cross_apply 2313 elif self._match(TokenType.LATERAL): 2314 this = self._parse_select(table=True) 2315 view = self._match(TokenType.VIEW) 2316 outer = self._match(TokenType.OUTER) 2317 else: 2318 return None 2319 2320 if not this: 2321 this = ( 2322 self._parse_unnest() 2323 or self._parse_function() 2324 or self._parse_id_var(any_token=False) 2325 ) 2326 2327 while self._match(TokenType.DOT): 2328 this = exp.Dot( 2329 this=this, 2330 expression=self._parse_function() or self._parse_id_var(any_token=False), 2331 ) 2332 2333 if view: 2334 table = self._parse_id_var(any_token=False) 2335 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2336 table_alias: t.Optional[exp.TableAlias] = self.expression( 2337 exp.TableAlias, this=table, columns=columns 2338 ) 2339 elif isinstance(this, exp.Subquery) and this.alias: 2340 # Ensures parity between the Subquery's and the Lateral's "alias" args 2341 table_alias = this.args["alias"].copy() 2342 else: 2343 table_alias = self._parse_table_alias() 2344 2345 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2346 2347 def _parse_join_parts( 2348 self, 2349 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2350 return ( 2351 self._match_set(self.JOIN_METHODS) and self._prev, 2352 self._match_set(self.JOIN_SIDES) and self._prev, 2353 self._match_set(self.JOIN_KINDS) and self._prev, 2354 ) 2355 2356 def _parse_join( 2357 self, skip_join_token: bool = False, parse_bracket: bool = False 2358 ) -> t.Optional[exp.Join]: 2359 if self._match(TokenType.COMMA): 2360 return self.expression(exp.Join, this=self._parse_table()) 2361 2362 index = self._index 2363 method, side, kind = self._parse_join_parts() 2364 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2365 join = self._match(TokenType.JOIN) 2366 2367 if not skip_join_token and not join: 2368 self._retreat(index) 2369 kind = None 2370 method = None 2371 side = None 2372 2373 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2374 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2375 2376 if not skip_join_token and not join and not outer_apply and not cross_apply: 2377 return None 2378 2379 if outer_apply: 2380 side = Token(TokenType.LEFT, "LEFT") 2381 2382 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2383 2384 if method: 2385 kwargs["method"] = method.text 2386 if side: 2387 kwargs["side"] = side.text 2388 if kind: 2389 kwargs["kind"] = kind.text 2390 if hint: 2391 kwargs["hint"] = hint 2392 2393 if self._match(TokenType.ON): 2394 kwargs["on"] = self._parse_conjunction() 2395 elif self._match(TokenType.USING): 2396 kwargs["using"] = self._parse_wrapped_id_vars() 2397 elif not (kind and kind.token_type == TokenType.CROSS): 2398 index = self._index 2399 joins = self._parse_joins() 2400 2401 if joins and self._match(TokenType.ON): 2402 kwargs["on"] = self._parse_conjunction() 2403 elif joins and self._match(TokenType.USING): 2404 kwargs["using"] = self._parse_wrapped_id_vars() 2405 else: 2406 joins = None 2407 self._retreat(index) 2408 2409 kwargs["this"].set("joins", joins) 2410 2411 comments = [c for token in (method, side, kind) if token for c in token.comments] 2412 return self.expression(exp.Join, comments=comments, **kwargs) 2413 2414 def _parse_index( 2415 self, 2416 index: t.Optional[exp.Expression] = None, 2417 ) -> t.Optional[exp.Index]: 2418 if index: 2419 unique = None 2420 primary = None 2421 amp = None 2422 2423 self._match(TokenType.ON) 2424 self._match(TokenType.TABLE) # hive 2425 table = self._parse_table_parts(schema=True) 2426 else: 2427 unique = self._match(TokenType.UNIQUE) 2428 primary = self._match_text_seq("PRIMARY") 2429 amp = self._match_text_seq("AMP") 2430 2431 if not self._match(TokenType.INDEX): 2432 return None 2433 2434 index = self._parse_id_var() 2435 table = None 2436 2437 using = self._parse_field() if self._match(TokenType.USING) else None 2438 2439 if self._match(TokenType.L_PAREN, advance=False): 2440 columns = self._parse_wrapped_csv(self._parse_ordered) 2441 else: 2442 columns = None 2443 2444 return self.expression( 2445 exp.Index, 2446 this=index, 2447 table=table, 2448 using=using, 2449 columns=columns, 2450 unique=unique, 2451 primary=primary, 2452 amp=amp, 2453 partition_by=self._parse_partition_by(), 2454 ) 2455 2456 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2457 hints: t.List[exp.Expression] = [] 2458 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2459 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2460 hints.append( 2461 self.expression( 2462 exp.WithTableHint, 2463 expressions=self._parse_csv( 2464 lambda: self._parse_function() or self._parse_var(any_token=True) 2465 ), 2466 ) 2467 ) 2468 self._match_r_paren() 2469 else: 2470 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2471 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2472 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2473 2474 self._match_texts({"INDEX", "KEY"}) 2475 if self._match(TokenType.FOR): 2476 hint.set("target", self._advance_any() and self._prev.text.upper()) 2477 2478 hint.set("expressions", self._parse_wrapped_id_vars()) 2479 hints.append(hint) 2480 2481 return hints or None 2482 2483 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2484 return ( 2485 (not schema and self._parse_function(optional_parens=False)) 2486 or self._parse_id_var(any_token=False) 2487 or self._parse_string_as_identifier() 2488 or self._parse_placeholder() 2489 ) 2490 2491 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2492 catalog = None 2493 db = None 2494 table = self._parse_table_part(schema=schema) 2495 2496 while self._match(TokenType.DOT): 2497 if catalog: 2498 # This allows nesting the table in arbitrarily many dot expressions if needed 2499 table = self.expression( 2500 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2501 ) 2502 else: 2503 catalog = db 2504 db = table 2505 table = self._parse_table_part(schema=schema) 2506 2507 if not table: 2508 self.raise_error(f"Expected table name but got {self._curr}") 2509 2510 return self.expression( 2511 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2512 ) 2513 2514 def _parse_table( 2515 self, 2516 schema: bool = False, 2517 joins: bool = False, 2518 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2519 parse_bracket: bool = False, 2520 ) -> t.Optional[exp.Expression]: 2521 lateral = self._parse_lateral() 2522 if lateral: 2523 return lateral 2524 2525 unnest = self._parse_unnest() 2526 if unnest: 2527 return unnest 2528 2529 values = self._parse_derived_table_values() 2530 if values: 2531 return values 2532 2533 subquery = self._parse_select(table=True) 2534 if subquery: 2535 if not subquery.args.get("pivots"): 2536 subquery.set("pivots", self._parse_pivots()) 2537 return subquery 2538 2539 bracket = parse_bracket and self._parse_bracket(None) 2540 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2541 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2542 2543 if schema: 2544 return self._parse_schema(this=this) 2545 2546 if self.ALIAS_POST_TABLESAMPLE: 2547 table_sample = self._parse_table_sample() 2548 2549 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2550 if alias: 2551 this.set("alias", alias) 2552 2553 if not this.args.get("pivots"): 2554 this.set("pivots", self._parse_pivots()) 2555 2556 this.set("hints", self._parse_table_hints()) 2557 2558 if not self.ALIAS_POST_TABLESAMPLE: 2559 table_sample = self._parse_table_sample() 2560 2561 if table_sample: 2562 table_sample.set("this", this) 2563 this = table_sample 2564 2565 if joins: 2566 for join in iter(self._parse_join, None): 2567 this.append("joins", join) 2568 2569 return this 2570 2571 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2572 if not self._match(TokenType.UNNEST): 2573 return None 2574 2575 expressions = self._parse_wrapped_csv(self._parse_type) 2576 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2577 2578 alias = self._parse_table_alias() if with_alias else None 2579 2580 if alias and self.UNNEST_COLUMN_ONLY: 2581 if alias.args.get("columns"): 2582 self.raise_error("Unexpected extra column alias in unnest.") 2583 2584 alias.set("columns", [alias.this]) 2585 alias.set("this", None) 2586 2587 offset = None 2588 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2589 self._match(TokenType.ALIAS) 2590 offset = self._parse_id_var() or exp.to_identifier("offset") 2591 2592 return self.expression( 2593 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2594 ) 2595 2596 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2597 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2598 if not is_derived and not self._match(TokenType.VALUES): 2599 return None 2600 2601 expressions = self._parse_csv(self._parse_value) 2602 alias = self._parse_table_alias() 2603 2604 if is_derived: 2605 self._match_r_paren() 2606 2607 return self.expression( 2608 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2609 ) 2610 2611 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2612 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2613 as_modifier and self._match_text_seq("USING", "SAMPLE") 2614 ): 2615 return None 2616 2617 bucket_numerator = None 2618 bucket_denominator = None 2619 bucket_field = None 2620 percent = None 2621 rows = None 2622 size = None 2623 seed = None 2624 2625 kind = ( 2626 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2627 ) 2628 method = self._parse_var(tokens=(TokenType.ROW,)) 2629 2630 self._match(TokenType.L_PAREN) 2631 2632 num = self._parse_number() 2633 2634 if self._match_text_seq("BUCKET"): 2635 bucket_numerator = self._parse_number() 2636 self._match_text_seq("OUT", "OF") 2637 bucket_denominator = bucket_denominator = self._parse_number() 2638 self._match(TokenType.ON) 2639 bucket_field = self._parse_field() 2640 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2641 percent = num 2642 elif self._match(TokenType.ROWS): 2643 rows = num 2644 else: 2645 size = num 2646 2647 self._match(TokenType.R_PAREN) 2648 2649 if self._match(TokenType.L_PAREN): 2650 method = self._parse_var() 2651 seed = self._match(TokenType.COMMA) and self._parse_number() 2652 self._match_r_paren() 2653 elif self._match_texts(("SEED", "REPEATABLE")): 2654 seed = self._parse_wrapped(self._parse_number) 2655 2656 return self.expression( 2657 exp.TableSample, 2658 method=method, 2659 bucket_numerator=bucket_numerator, 2660 bucket_denominator=bucket_denominator, 2661 bucket_field=bucket_field, 2662 percent=percent, 2663 rows=rows, 2664 size=size, 2665 seed=seed, 2666 kind=kind, 2667 ) 2668 2669 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2670 return list(iter(self._parse_pivot, None)) or None 2671 2672 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2673 return list(iter(self._parse_join, None)) or None 2674 2675 # https://duckdb.org/docs/sql/statements/pivot 2676 def _parse_simplified_pivot(self) -> exp.Pivot: 2677 def _parse_on() -> t.Optional[exp.Expression]: 2678 this = self._parse_bitwise() 2679 return self._parse_in(this) if self._match(TokenType.IN) else this 2680 2681 this = self._parse_table() 2682 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2683 using = self._match(TokenType.USING) and self._parse_csv( 2684 lambda: self._parse_alias(self._parse_function()) 2685 ) 2686 group = self._parse_group() 2687 return self.expression( 2688 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2689 ) 2690 2691 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2692 index = self._index 2693 include_nulls = None 2694 2695 if self._match(TokenType.PIVOT): 2696 unpivot = False 2697 elif self._match(TokenType.UNPIVOT): 2698 unpivot = True 2699 2700 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2701 if self._match_text_seq("INCLUDE", "NULLS"): 2702 include_nulls = True 2703 elif self._match_text_seq("EXCLUDE", "NULLS"): 2704 include_nulls = False 2705 else: 2706 return None 2707 2708 expressions = [] 2709 field = None 2710 2711 if not self._match(TokenType.L_PAREN): 2712 self._retreat(index) 2713 return None 2714 2715 if unpivot: 2716 expressions = self._parse_csv(self._parse_column) 2717 else: 2718 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2719 2720 if not expressions: 2721 self.raise_error("Failed to parse PIVOT's aggregation list") 2722 2723 if not self._match(TokenType.FOR): 2724 self.raise_error("Expecting FOR") 2725 2726 value = self._parse_column() 2727 2728 if not self._match(TokenType.IN): 2729 self.raise_error("Expecting IN") 2730 2731 field = self._parse_in(value, alias=True) 2732 2733 self._match_r_paren() 2734 2735 pivot = self.expression( 2736 exp.Pivot, 2737 expressions=expressions, 2738 field=field, 2739 unpivot=unpivot, 2740 include_nulls=include_nulls, 2741 ) 2742 2743 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2744 pivot.set("alias", self._parse_table_alias()) 2745 2746 if not unpivot: 2747 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2748 2749 columns: t.List[exp.Expression] = [] 2750 for fld in pivot.args["field"].expressions: 2751 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2752 for name in names: 2753 if self.PREFIXED_PIVOT_COLUMNS: 2754 name = f"{name}_{field_name}" if name else field_name 2755 else: 2756 name = f"{field_name}_{name}" if name else field_name 2757 2758 columns.append(exp.to_identifier(name)) 2759 2760 pivot.set("columns", columns) 2761 2762 return pivot 2763 2764 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2765 return [agg.alias for agg in aggregations] 2766 2767 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2768 if not skip_where_token and not self._match(TokenType.WHERE): 2769 return None 2770 2771 return self.expression( 2772 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2773 ) 2774 2775 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2776 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2777 return None 2778 2779 elements = defaultdict(list) 2780 2781 if self._match(TokenType.ALL): 2782 return self.expression(exp.Group, all=True) 2783 2784 while True: 2785 expressions = self._parse_csv(self._parse_conjunction) 2786 if expressions: 2787 elements["expressions"].extend(expressions) 2788 2789 grouping_sets = self._parse_grouping_sets() 2790 if grouping_sets: 2791 elements["grouping_sets"].extend(grouping_sets) 2792 2793 rollup = None 2794 cube = None 2795 totals = None 2796 2797 with_ = self._match(TokenType.WITH) 2798 if self._match(TokenType.ROLLUP): 2799 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2800 elements["rollup"].extend(ensure_list(rollup)) 2801 2802 if self._match(TokenType.CUBE): 2803 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2804 elements["cube"].extend(ensure_list(cube)) 2805 2806 if self._match_text_seq("TOTALS"): 2807 totals = True 2808 elements["totals"] = True # type: ignore 2809 2810 if not (grouping_sets or rollup or cube or totals): 2811 break 2812 2813 return self.expression(exp.Group, **elements) # type: ignore 2814 2815 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2816 if not self._match(TokenType.GROUPING_SETS): 2817 return None 2818 2819 return self._parse_wrapped_csv(self._parse_grouping_set) 2820 2821 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2822 if self._match(TokenType.L_PAREN): 2823 grouping_set = self._parse_csv(self._parse_column) 2824 self._match_r_paren() 2825 return self.expression(exp.Tuple, expressions=grouping_set) 2826 2827 return self._parse_column() 2828 2829 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2830 if not skip_having_token and not self._match(TokenType.HAVING): 2831 return None 2832 return self.expression(exp.Having, this=self._parse_conjunction()) 2833 2834 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2835 if not self._match(TokenType.QUALIFY): 2836 return None 2837 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2838 2839 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2840 if skip_start_token: 2841 start = None 2842 elif self._match(TokenType.START_WITH): 2843 start = self._parse_conjunction() 2844 else: 2845 return None 2846 2847 self._match(TokenType.CONNECT_BY) 2848 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2849 exp.Prior, this=self._parse_bitwise() 2850 ) 2851 connect = self._parse_conjunction() 2852 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2853 return self.expression(exp.Connect, start=start, connect=connect) 2854 2855 def _parse_order( 2856 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2857 ) -> t.Optional[exp.Expression]: 2858 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2859 return this 2860 2861 return self.expression( 2862 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2863 ) 2864 2865 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2866 if not self._match(token): 2867 return None 2868 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2869 2870 def _parse_ordered(self) -> exp.Ordered: 2871 this = self._parse_conjunction() 2872 self._match(TokenType.ASC) 2873 2874 is_desc = self._match(TokenType.DESC) 2875 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2876 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2877 desc = is_desc or False 2878 asc = not desc 2879 nulls_first = is_nulls_first or False 2880 explicitly_null_ordered = is_nulls_first or is_nulls_last 2881 2882 if ( 2883 not explicitly_null_ordered 2884 and ( 2885 (asc and self.NULL_ORDERING == "nulls_are_small") 2886 or (desc and self.NULL_ORDERING != "nulls_are_small") 2887 ) 2888 and self.NULL_ORDERING != "nulls_are_last" 2889 ): 2890 nulls_first = True 2891 2892 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2893 2894 def _parse_limit( 2895 self, this: t.Optional[exp.Expression] = None, top: bool = False 2896 ) -> t.Optional[exp.Expression]: 2897 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2898 comments = self._prev_comments 2899 if top: 2900 limit_paren = self._match(TokenType.L_PAREN) 2901 expression = self._parse_number() 2902 2903 if limit_paren: 2904 self._match_r_paren() 2905 else: 2906 expression = self._parse_term() 2907 2908 if self._match(TokenType.COMMA): 2909 offset = expression 2910 expression = self._parse_term() 2911 else: 2912 offset = None 2913 2914 limit_exp = self.expression( 2915 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2916 ) 2917 2918 return limit_exp 2919 2920 if self._match(TokenType.FETCH): 2921 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2922 direction = self._prev.text if direction else "FIRST" 2923 2924 count = self._parse_number() 2925 percent = self._match(TokenType.PERCENT) 2926 2927 self._match_set((TokenType.ROW, TokenType.ROWS)) 2928 2929 only = self._match_text_seq("ONLY") 2930 with_ties = self._match_text_seq("WITH", "TIES") 2931 2932 if only and with_ties: 2933 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2934 2935 return self.expression( 2936 exp.Fetch, 2937 direction=direction, 2938 count=count, 2939 percent=percent, 2940 with_ties=with_ties, 2941 ) 2942 2943 return this 2944 2945 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2946 if not self._match(TokenType.OFFSET): 2947 return this 2948 2949 count = self._parse_term() 2950 self._match_set((TokenType.ROW, TokenType.ROWS)) 2951 return self.expression(exp.Offset, this=this, expression=count) 2952 2953 def _parse_locks(self) -> t.List[exp.Lock]: 2954 locks = [] 2955 while True: 2956 if self._match_text_seq("FOR", "UPDATE"): 2957 update = True 2958 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2959 "LOCK", "IN", "SHARE", "MODE" 2960 ): 2961 update = False 2962 else: 2963 break 2964 2965 expressions = None 2966 if self._match_text_seq("OF"): 2967 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2968 2969 wait: t.Optional[bool | exp.Expression] = None 2970 if self._match_text_seq("NOWAIT"): 2971 wait = True 2972 elif self._match_text_seq("WAIT"): 2973 wait = self._parse_primary() 2974 elif self._match_text_seq("SKIP", "LOCKED"): 2975 wait = False 2976 2977 locks.append( 2978 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2979 ) 2980 2981 return locks 2982 2983 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2984 if not self._match_set(self.SET_OPERATIONS): 2985 return this 2986 2987 token_type = self._prev.token_type 2988 2989 if token_type == TokenType.UNION: 2990 expression = exp.Union 2991 elif token_type == TokenType.EXCEPT: 2992 expression = exp.Except 2993 else: 2994 expression = exp.Intersect 2995 2996 return self.expression( 2997 expression, 2998 this=this, 2999 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3000 by_name=self._match_text_seq("BY", "NAME"), 3001 expression=self._parse_set_operations(self._parse_select(nested=True)), 3002 ) 3003 3004 def _parse_expression(self) -> t.Optional[exp.Expression]: 3005 return self._parse_alias(self._parse_conjunction()) 3006 3007 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3008 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3009 3010 def _parse_equality(self) -> t.Optional[exp.Expression]: 3011 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3012 3013 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3014 return self._parse_tokens(self._parse_range, self.COMPARISON) 3015 3016 def _parse_range(self) -> t.Optional[exp.Expression]: 3017 this = self._parse_bitwise() 3018 negate = self._match(TokenType.NOT) 3019 3020 if self._match_set(self.RANGE_PARSERS): 3021 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3022 if not expression: 3023 return this 3024 3025 this = expression 3026 elif self._match(TokenType.ISNULL): 3027 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3028 3029 # Postgres supports ISNULL and NOTNULL for conditions. 3030 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3031 if self._match(TokenType.NOTNULL): 3032 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3033 this = self.expression(exp.Not, this=this) 3034 3035 if negate: 3036 this = self.expression(exp.Not, this=this) 3037 3038 if self._match(TokenType.IS): 3039 this = self._parse_is(this) 3040 3041 return this 3042 3043 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3044 index = self._index - 1 3045 negate = self._match(TokenType.NOT) 3046 3047 if self._match_text_seq("DISTINCT", "FROM"): 3048 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3049 return self.expression(klass, this=this, expression=self._parse_expression()) 3050 3051 expression = self._parse_null() or self._parse_boolean() 3052 if not expression: 3053 self._retreat(index) 3054 return None 3055 3056 this = self.expression(exp.Is, this=this, expression=expression) 3057 return self.expression(exp.Not, this=this) if negate else this 3058 3059 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3060 unnest = self._parse_unnest(with_alias=False) 3061 if unnest: 3062 this = self.expression(exp.In, this=this, unnest=unnest) 3063 elif self._match(TokenType.L_PAREN): 3064 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3065 3066 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3067 this = self.expression(exp.In, this=this, query=expressions[0]) 3068 else: 3069 this = self.expression(exp.In, this=this, expressions=expressions) 3070 3071 self._match_r_paren(this) 3072 else: 3073 this = self.expression(exp.In, this=this, field=self._parse_field()) 3074 3075 return this 3076 3077 def _parse_between(self, this: exp.Expression) -> exp.Between: 3078 low = self._parse_bitwise() 3079 self._match(TokenType.AND) 3080 high = self._parse_bitwise() 3081 return self.expression(exp.Between, this=this, low=low, high=high) 3082 3083 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3084 if not self._match(TokenType.ESCAPE): 3085 return this 3086 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3087 3088 def _parse_interval(self) -> t.Optional[exp.Interval]: 3089 index = self._index 3090 3091 if not self._match(TokenType.INTERVAL): 3092 return None 3093 3094 if self._match(TokenType.STRING, advance=False): 3095 this = self._parse_primary() 3096 else: 3097 this = self._parse_term() 3098 3099 if not this: 3100 self._retreat(index) 3101 return None 3102 3103 unit = self._parse_function() or self._parse_var(any_token=True) 3104 3105 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3106 # each INTERVAL expression into this canonical form so it's easy to transpile 3107 if this and this.is_number: 3108 this = exp.Literal.string(this.name) 3109 elif this and this.is_string: 3110 parts = this.name.split() 3111 3112 if len(parts) == 2: 3113 if unit: 3114 # this is not actually a unit, it's something else 3115 unit = None 3116 self._retreat(self._index - 1) 3117 else: 3118 this = exp.Literal.string(parts[0]) 3119 unit = self.expression(exp.Var, this=parts[1]) 3120 3121 return self.expression(exp.Interval, this=this, unit=unit) 3122 3123 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3124 this = self._parse_term() 3125 3126 while True: 3127 if self._match_set(self.BITWISE): 3128 this = self.expression( 3129 self.BITWISE[self._prev.token_type], 3130 this=this, 3131 expression=self._parse_term(), 3132 ) 3133 elif self._match(TokenType.DQMARK): 3134 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3135 elif self._match_pair(TokenType.LT, TokenType.LT): 3136 this = self.expression( 3137 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3138 ) 3139 elif self._match_pair(TokenType.GT, TokenType.GT): 3140 this = self.expression( 3141 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3142 ) 3143 else: 3144 break 3145 3146 return this 3147 3148 def _parse_term(self) -> t.Optional[exp.Expression]: 3149 return self._parse_tokens(self._parse_factor, self.TERM) 3150 3151 def _parse_factor(self) -> t.Optional[exp.Expression]: 3152 return self._parse_tokens(self._parse_unary, self.FACTOR) 3153 3154 def _parse_unary(self) -> t.Optional[exp.Expression]: 3155 if self._match_set(self.UNARY_PARSERS): 3156 return self.UNARY_PARSERS[self._prev.token_type](self) 3157 return self._parse_at_time_zone(self._parse_type()) 3158 3159 def _parse_type(self) -> t.Optional[exp.Expression]: 3160 interval = self._parse_interval() 3161 if interval: 3162 return interval 3163 3164 index = self._index 3165 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3166 this = self._parse_column() 3167 3168 if data_type: 3169 if isinstance(this, exp.Literal): 3170 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3171 if parser: 3172 return parser(self, this, data_type) 3173 return self.expression(exp.Cast, this=this, to=data_type) 3174 if not data_type.expressions: 3175 self._retreat(index) 3176 return self._parse_column() 3177 return self._parse_column_ops(data_type) 3178 3179 return this 3180 3181 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3182 this = self._parse_type() 3183 if not this: 3184 return None 3185 3186 return self.expression( 3187 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3188 ) 3189 3190 def _parse_types( 3191 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3192 ) -> t.Optional[exp.Expression]: 3193 index = self._index 3194 3195 prefix = self._match_text_seq("SYSUDTLIB", ".") 3196 3197 if not self._match_set(self.TYPE_TOKENS): 3198 identifier = allow_identifiers and self._parse_id_var( 3199 any_token=False, tokens=(TokenType.VAR,) 3200 ) 3201 3202 if identifier: 3203 tokens = self._tokenizer.tokenize(identifier.name) 3204 3205 if len(tokens) != 1: 3206 self.raise_error("Unexpected identifier", self._prev) 3207 3208 if tokens[0].token_type in self.TYPE_TOKENS: 3209 self._prev = tokens[0] 3210 elif self.SUPPORTS_USER_DEFINED_TYPES: 3211 return identifier 3212 else: 3213 return None 3214 else: 3215 return None 3216 3217 type_token = self._prev.token_type 3218 3219 if type_token == TokenType.PSEUDO_TYPE: 3220 return self.expression(exp.PseudoType, this=self._prev.text) 3221 3222 nested = type_token in self.NESTED_TYPE_TOKENS 3223 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3224 expressions = None 3225 maybe_func = False 3226 3227 if self._match(TokenType.L_PAREN): 3228 if is_struct: 3229 expressions = self._parse_csv(self._parse_struct_types) 3230 elif nested: 3231 expressions = self._parse_csv( 3232 lambda: self._parse_types( 3233 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3234 ) 3235 ) 3236 elif type_token in self.ENUM_TYPE_TOKENS: 3237 expressions = self._parse_csv(self._parse_equality) 3238 else: 3239 expressions = self._parse_csv(self._parse_type_size) 3240 3241 if not expressions or not self._match(TokenType.R_PAREN): 3242 self._retreat(index) 3243 return None 3244 3245 maybe_func = True 3246 3247 this: t.Optional[exp.Expression] = None 3248 values: t.Optional[t.List[exp.Expression]] = None 3249 3250 if nested and self._match(TokenType.LT): 3251 if is_struct: 3252 expressions = self._parse_csv(self._parse_struct_types) 3253 else: 3254 expressions = self._parse_csv( 3255 lambda: self._parse_types( 3256 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3257 ) 3258 ) 3259 3260 if not self._match(TokenType.GT): 3261 self.raise_error("Expecting >") 3262 3263 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3264 values = self._parse_csv(self._parse_conjunction) 3265 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3266 3267 if type_token in self.TIMESTAMPS: 3268 if self._match_text_seq("WITH", "TIME", "ZONE"): 3269 maybe_func = False 3270 tz_type = ( 3271 exp.DataType.Type.TIMETZ 3272 if type_token in self.TIMES 3273 else exp.DataType.Type.TIMESTAMPTZ 3274 ) 3275 this = exp.DataType(this=tz_type, expressions=expressions) 3276 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3277 maybe_func = False 3278 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3279 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3280 maybe_func = False 3281 elif type_token == TokenType.INTERVAL: 3282 if self._match_text_seq("YEAR", "TO", "MONTH"): 3283 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3284 elif self._match_text_seq("DAY", "TO", "SECOND"): 3285 span = [exp.IntervalDayToSecondSpan()] 3286 else: 3287 span = None 3288 3289 unit = not span and self._parse_var() 3290 if not unit: 3291 this = self.expression( 3292 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3293 ) 3294 else: 3295 this = self.expression(exp.Interval, unit=unit) 3296 3297 if maybe_func and check_func: 3298 index2 = self._index 3299 peek = self._parse_string() 3300 3301 if not peek: 3302 self._retreat(index) 3303 return None 3304 3305 self._retreat(index2) 3306 3307 if not this: 3308 this = exp.DataType( 3309 this=exp.DataType.Type[type_token.value], 3310 expressions=expressions, 3311 nested=nested, 3312 values=values, 3313 prefix=prefix, 3314 ) 3315 3316 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3317 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3318 3319 return this 3320 3321 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3322 this = self._parse_type() or self._parse_id_var() 3323 self._match(TokenType.COLON) 3324 return self._parse_column_def(this) 3325 3326 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3327 if not self._match_text_seq("AT", "TIME", "ZONE"): 3328 return this 3329 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3330 3331 def _parse_column(self) -> t.Optional[exp.Expression]: 3332 this = self._parse_field() 3333 if isinstance(this, exp.Identifier): 3334 this = self.expression(exp.Column, this=this) 3335 elif not this: 3336 return self._parse_bracket(this) 3337 return self._parse_column_ops(this) 3338 3339 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3340 this = self._parse_bracket(this) 3341 3342 while self._match_set(self.COLUMN_OPERATORS): 3343 op_token = self._prev.token_type 3344 op = self.COLUMN_OPERATORS.get(op_token) 3345 3346 if op_token == TokenType.DCOLON: 3347 field = self._parse_types() 3348 if not field: 3349 self.raise_error("Expected type") 3350 elif op and self._curr: 3351 self._advance() 3352 value = self._prev.text 3353 field = ( 3354 exp.Literal.number(value) 3355 if self._prev.token_type == TokenType.NUMBER 3356 else exp.Literal.string(value) 3357 ) 3358 else: 3359 field = self._parse_field(anonymous_func=True, any_token=True) 3360 3361 if isinstance(field, exp.Func): 3362 # bigquery allows function calls like x.y.count(...) 3363 # SAFE.SUBSTR(...) 3364 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3365 this = self._replace_columns_with_dots(this) 3366 3367 if op: 3368 this = op(self, this, field) 3369 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3370 this = self.expression( 3371 exp.Column, 3372 this=field, 3373 table=this.this, 3374 db=this.args.get("table"), 3375 catalog=this.args.get("db"), 3376 ) 3377 else: 3378 this = self.expression(exp.Dot, this=this, expression=field) 3379 this = self._parse_bracket(this) 3380 return this 3381 3382 def _parse_primary(self) -> t.Optional[exp.Expression]: 3383 if self._match_set(self.PRIMARY_PARSERS): 3384 token_type = self._prev.token_type 3385 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3386 3387 if token_type == TokenType.STRING: 3388 expressions = [primary] 3389 while self._match(TokenType.STRING): 3390 expressions.append(exp.Literal.string(self._prev.text)) 3391 3392 if len(expressions) > 1: 3393 return self.expression(exp.Concat, expressions=expressions) 3394 3395 return primary 3396 3397 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3398 return exp.Literal.number(f"0.{self._prev.text}") 3399 3400 if self._match(TokenType.L_PAREN): 3401 comments = self._prev_comments 3402 query = self._parse_select() 3403 3404 if query: 3405 expressions = [query] 3406 else: 3407 expressions = self._parse_expressions() 3408 3409 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3410 3411 if isinstance(this, exp.Subqueryable): 3412 this = self._parse_set_operations( 3413 self._parse_subquery(this=this, parse_alias=False) 3414 ) 3415 elif len(expressions) > 1: 3416 this = self.expression(exp.Tuple, expressions=expressions) 3417 else: 3418 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3419 3420 if this: 3421 this.add_comments(comments) 3422 3423 self._match_r_paren(expression=this) 3424 return this 3425 3426 return None 3427 3428 def _parse_field( 3429 self, 3430 any_token: bool = False, 3431 tokens: t.Optional[t.Collection[TokenType]] = None, 3432 anonymous_func: bool = False, 3433 ) -> t.Optional[exp.Expression]: 3434 return ( 3435 self._parse_primary() 3436 or self._parse_function(anonymous=anonymous_func) 3437 or self._parse_id_var(any_token=any_token, tokens=tokens) 3438 ) 3439 3440 def _parse_function( 3441 self, 3442 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3443 anonymous: bool = False, 3444 optional_parens: bool = True, 3445 ) -> t.Optional[exp.Expression]: 3446 if not self._curr: 3447 return None 3448 3449 token_type = self._curr.token_type 3450 this = self._curr.text 3451 upper = this.upper() 3452 3453 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3454 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3455 self._advance() 3456 return parser(self) 3457 3458 if not self._next or self._next.token_type != TokenType.L_PAREN: 3459 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3460 self._advance() 3461 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3462 3463 return None 3464 3465 if token_type not in self.FUNC_TOKENS: 3466 return None 3467 3468 self._advance(2) 3469 3470 parser = self.FUNCTION_PARSERS.get(upper) 3471 if parser and not anonymous: 3472 this = parser(self) 3473 else: 3474 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3475 3476 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3477 this = self.expression(subquery_predicate, this=self._parse_select()) 3478 self._match_r_paren() 3479 return this 3480 3481 if functions is None: 3482 functions = self.FUNCTIONS 3483 3484 function = functions.get(upper) 3485 3486 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3487 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3488 3489 if function and not anonymous: 3490 func = self.validate_expression(function(args), args) 3491 if not self.NORMALIZE_FUNCTIONS: 3492 func.meta["name"] = this 3493 this = func 3494 else: 3495 this = self.expression(exp.Anonymous, this=this, expressions=args) 3496 3497 self._match_r_paren(this) 3498 return self._parse_window(this) 3499 3500 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3501 return self._parse_column_def(self._parse_id_var()) 3502 3503 def _parse_user_defined_function( 3504 self, kind: t.Optional[TokenType] = None 3505 ) -> t.Optional[exp.Expression]: 3506 this = self._parse_id_var() 3507 3508 while self._match(TokenType.DOT): 3509 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3510 3511 if not self._match(TokenType.L_PAREN): 3512 return this 3513 3514 expressions = self._parse_csv(self._parse_function_parameter) 3515 self._match_r_paren() 3516 return self.expression( 3517 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3518 ) 3519 3520 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3521 literal = self._parse_primary() 3522 if literal: 3523 return self.expression(exp.Introducer, this=token.text, expression=literal) 3524 3525 return self.expression(exp.Identifier, this=token.text) 3526 3527 def _parse_session_parameter(self) -> exp.SessionParameter: 3528 kind = None 3529 this = self._parse_id_var() or self._parse_primary() 3530 3531 if this and self._match(TokenType.DOT): 3532 kind = this.name 3533 this = self._parse_var() or self._parse_primary() 3534 3535 return self.expression(exp.SessionParameter, this=this, kind=kind) 3536 3537 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3538 index = self._index 3539 3540 if self._match(TokenType.L_PAREN): 3541 expressions = t.cast( 3542 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3543 ) 3544 3545 if not self._match(TokenType.R_PAREN): 3546 self._retreat(index) 3547 else: 3548 expressions = [self._parse_id_var()] 3549 3550 if self._match_set(self.LAMBDAS): 3551 return self.LAMBDAS[self._prev.token_type](self, expressions) 3552 3553 self._retreat(index) 3554 3555 this: t.Optional[exp.Expression] 3556 3557 if self._match(TokenType.DISTINCT): 3558 this = self.expression( 3559 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3560 ) 3561 else: 3562 this = self._parse_select_or_expression(alias=alias) 3563 3564 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3565 3566 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3567 index = self._index 3568 3569 if not self.errors: 3570 try: 3571 if self._parse_select(nested=True): 3572 return this 3573 except ParseError: 3574 pass 3575 finally: 3576 self.errors.clear() 3577 self._retreat(index) 3578 3579 if not self._match(TokenType.L_PAREN): 3580 return this 3581 3582 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3583 3584 self._match_r_paren() 3585 return self.expression(exp.Schema, this=this, expressions=args) 3586 3587 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3588 return self._parse_column_def(self._parse_field(any_token=True)) 3589 3590 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3591 # column defs are not really columns, they're identifiers 3592 if isinstance(this, exp.Column): 3593 this = this.this 3594 3595 kind = self._parse_types(schema=True) 3596 3597 if self._match_text_seq("FOR", "ORDINALITY"): 3598 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3599 3600 constraints: t.List[exp.Expression] = [] 3601 3602 if not kind and self._match(TokenType.ALIAS): 3603 constraints.append( 3604 self.expression( 3605 exp.ComputedColumnConstraint, 3606 this=self._parse_conjunction(), 3607 persisted=self._match_text_seq("PERSISTED"), 3608 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3609 ) 3610 ) 3611 3612 while True: 3613 constraint = self._parse_column_constraint() 3614 if not constraint: 3615 break 3616 constraints.append(constraint) 3617 3618 if not kind and not constraints: 3619 return this 3620 3621 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3622 3623 def _parse_auto_increment( 3624 self, 3625 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3626 start = None 3627 increment = None 3628 3629 if self._match(TokenType.L_PAREN, advance=False): 3630 args = self._parse_wrapped_csv(self._parse_bitwise) 3631 start = seq_get(args, 0) 3632 increment = seq_get(args, 1) 3633 elif self._match_text_seq("START"): 3634 start = self._parse_bitwise() 3635 self._match_text_seq("INCREMENT") 3636 increment = self._parse_bitwise() 3637 3638 if start and increment: 3639 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3640 3641 return exp.AutoIncrementColumnConstraint() 3642 3643 def _parse_compress(self) -> exp.CompressColumnConstraint: 3644 if self._match(TokenType.L_PAREN, advance=False): 3645 return self.expression( 3646 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3647 ) 3648 3649 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3650 3651 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3652 if self._match_text_seq("BY", "DEFAULT"): 3653 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3654 this = self.expression( 3655 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3656 ) 3657 else: 3658 self._match_text_seq("ALWAYS") 3659 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3660 3661 self._match(TokenType.ALIAS) 3662 identity = self._match_text_seq("IDENTITY") 3663 3664 if self._match(TokenType.L_PAREN): 3665 if self._match(TokenType.START_WITH): 3666 this.set("start", self._parse_bitwise()) 3667 if self._match_text_seq("INCREMENT", "BY"): 3668 this.set("increment", self._parse_bitwise()) 3669 if self._match_text_seq("MINVALUE"): 3670 this.set("minvalue", self._parse_bitwise()) 3671 if self._match_text_seq("MAXVALUE"): 3672 this.set("maxvalue", self._parse_bitwise()) 3673 3674 if self._match_text_seq("CYCLE"): 3675 this.set("cycle", True) 3676 elif self._match_text_seq("NO", "CYCLE"): 3677 this.set("cycle", False) 3678 3679 if not identity: 3680 this.set("expression", self._parse_bitwise()) 3681 3682 self._match_r_paren() 3683 3684 return this 3685 3686 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3687 self._match_text_seq("LENGTH") 3688 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3689 3690 def _parse_not_constraint( 3691 self, 3692 ) -> t.Optional[exp.Expression]: 3693 if self._match_text_seq("NULL"): 3694 return self.expression(exp.NotNullColumnConstraint) 3695 if self._match_text_seq("CASESPECIFIC"): 3696 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3697 if self._match_text_seq("FOR", "REPLICATION"): 3698 return self.expression(exp.NotForReplicationColumnConstraint) 3699 return None 3700 3701 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3702 if self._match(TokenType.CONSTRAINT): 3703 this = self._parse_id_var() 3704 else: 3705 this = None 3706 3707 if self._match_texts(self.CONSTRAINT_PARSERS): 3708 return self.expression( 3709 exp.ColumnConstraint, 3710 this=this, 3711 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3712 ) 3713 3714 return this 3715 3716 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3717 if not self._match(TokenType.CONSTRAINT): 3718 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3719 3720 this = self._parse_id_var() 3721 expressions = [] 3722 3723 while True: 3724 constraint = self._parse_unnamed_constraint() or self._parse_function() 3725 if not constraint: 3726 break 3727 expressions.append(constraint) 3728 3729 return self.expression(exp.Constraint, this=this, expressions=expressions) 3730 3731 def _parse_unnamed_constraint( 3732 self, constraints: t.Optional[t.Collection[str]] = None 3733 ) -> t.Optional[exp.Expression]: 3734 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3735 return None 3736 3737 constraint = self._prev.text.upper() 3738 if constraint not in self.CONSTRAINT_PARSERS: 3739 self.raise_error(f"No parser found for schema constraint {constraint}.") 3740 3741 return self.CONSTRAINT_PARSERS[constraint](self) 3742 3743 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3744 self._match_text_seq("KEY") 3745 return self.expression( 3746 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3747 ) 3748 3749 def _parse_key_constraint_options(self) -> t.List[str]: 3750 options = [] 3751 while True: 3752 if not self._curr: 3753 break 3754 3755 if self._match(TokenType.ON): 3756 action = None 3757 on = self._advance_any() and self._prev.text 3758 3759 if self._match_text_seq("NO", "ACTION"): 3760 action = "NO ACTION" 3761 elif self._match_text_seq("CASCADE"): 3762 action = "CASCADE" 3763 elif self._match_pair(TokenType.SET, TokenType.NULL): 3764 action = "SET NULL" 3765 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3766 action = "SET DEFAULT" 3767 else: 3768 self.raise_error("Invalid key constraint") 3769 3770 options.append(f"ON {on} {action}") 3771 elif self._match_text_seq("NOT", "ENFORCED"): 3772 options.append("NOT ENFORCED") 3773 elif self._match_text_seq("DEFERRABLE"): 3774 options.append("DEFERRABLE") 3775 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3776 options.append("INITIALLY DEFERRED") 3777 elif self._match_text_seq("NORELY"): 3778 options.append("NORELY") 3779 elif self._match_text_seq("MATCH", "FULL"): 3780 options.append("MATCH FULL") 3781 else: 3782 break 3783 3784 return options 3785 3786 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3787 if match and not self._match(TokenType.REFERENCES): 3788 return None 3789 3790 expressions = None 3791 this = self._parse_table(schema=True) 3792 options = self._parse_key_constraint_options() 3793 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3794 3795 def _parse_foreign_key(self) -> exp.ForeignKey: 3796 expressions = self._parse_wrapped_id_vars() 3797 reference = self._parse_references() 3798 options = {} 3799 3800 while self._match(TokenType.ON): 3801 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3802 self.raise_error("Expected DELETE or UPDATE") 3803 3804 kind = self._prev.text.lower() 3805 3806 if self._match_text_seq("NO", "ACTION"): 3807 action = "NO ACTION" 3808 elif self._match(TokenType.SET): 3809 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3810 action = "SET " + self._prev.text.upper() 3811 else: 3812 self._advance() 3813 action = self._prev.text.upper() 3814 3815 options[kind] = action 3816 3817 return self.expression( 3818 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3819 ) 3820 3821 def _parse_primary_key( 3822 self, wrapped_optional: bool = False, in_props: bool = False 3823 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3824 desc = ( 3825 self._match_set((TokenType.ASC, TokenType.DESC)) 3826 and self._prev.token_type == TokenType.DESC 3827 ) 3828 3829 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3830 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3831 3832 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3833 options = self._parse_key_constraint_options() 3834 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3835 3836 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3837 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3838 return this 3839 3840 bracket_kind = self._prev.token_type 3841 3842 if self._match(TokenType.COLON): 3843 expressions: t.List[exp.Expression] = [ 3844 self.expression(exp.Slice, expression=self._parse_conjunction()) 3845 ] 3846 else: 3847 expressions = self._parse_csv( 3848 lambda: self._parse_slice( 3849 self._parse_alias(self._parse_conjunction(), explicit=True) 3850 ) 3851 ) 3852 3853 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3854 if bracket_kind == TokenType.L_BRACE: 3855 this = self.expression(exp.Struct, expressions=expressions) 3856 elif not this or this.name.upper() == "ARRAY": 3857 this = self.expression(exp.Array, expressions=expressions) 3858 else: 3859 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3860 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3861 3862 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3863 self.raise_error("Expected ]") 3864 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3865 self.raise_error("Expected }") 3866 3867 self._add_comments(this) 3868 return self._parse_bracket(this) 3869 3870 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3871 if self._match(TokenType.COLON): 3872 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3873 return this 3874 3875 def _parse_case(self) -> t.Optional[exp.Expression]: 3876 ifs = [] 3877 default = None 3878 3879 comments = self._prev_comments 3880 expression = self._parse_conjunction() 3881 3882 while self._match(TokenType.WHEN): 3883 this = self._parse_conjunction() 3884 self._match(TokenType.THEN) 3885 then = self._parse_conjunction() 3886 ifs.append(self.expression(exp.If, this=this, true=then)) 3887 3888 if self._match(TokenType.ELSE): 3889 default = self._parse_conjunction() 3890 3891 if not self._match(TokenType.END): 3892 self.raise_error("Expected END after CASE", self._prev) 3893 3894 return self._parse_window( 3895 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3896 ) 3897 3898 def _parse_if(self) -> t.Optional[exp.Expression]: 3899 if self._match(TokenType.L_PAREN): 3900 args = self._parse_csv(self._parse_conjunction) 3901 this = self.validate_expression(exp.If.from_arg_list(args), args) 3902 self._match_r_paren() 3903 else: 3904 index = self._index - 1 3905 condition = self._parse_conjunction() 3906 3907 if not condition: 3908 self._retreat(index) 3909 return None 3910 3911 self._match(TokenType.THEN) 3912 true = self._parse_conjunction() 3913 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3914 self._match(TokenType.END) 3915 this = self.expression(exp.If, this=condition, true=true, false=false) 3916 3917 return self._parse_window(this) 3918 3919 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3920 if not self._match_text_seq("VALUE", "FOR"): 3921 self._retreat(self._index - 1) 3922 return None 3923 3924 return self.expression( 3925 exp.NextValueFor, 3926 this=self._parse_column(), 3927 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3928 ) 3929 3930 def _parse_extract(self) -> exp.Extract: 3931 this = self._parse_function() or self._parse_var() or self._parse_type() 3932 3933 if self._match(TokenType.FROM): 3934 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3935 3936 if not self._match(TokenType.COMMA): 3937 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3938 3939 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3940 3941 def _parse_any_value(self) -> exp.AnyValue: 3942 this = self._parse_lambda() 3943 is_max = None 3944 having = None 3945 3946 if self._match(TokenType.HAVING): 3947 self._match_texts(("MAX", "MIN")) 3948 is_max = self._prev.text == "MAX" 3949 having = self._parse_column() 3950 3951 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3952 3953 def _parse_cast(self, strict: bool) -> exp.Expression: 3954 this = self._parse_conjunction() 3955 3956 if not self._match(TokenType.ALIAS): 3957 if self._match(TokenType.COMMA): 3958 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3959 3960 self.raise_error("Expected AS after CAST") 3961 3962 fmt = None 3963 to = self._parse_types() 3964 3965 if not to: 3966 self.raise_error("Expected TYPE after CAST") 3967 elif isinstance(to, exp.Identifier): 3968 to = exp.DataType.build(to.name, udt=True) 3969 elif to.this == exp.DataType.Type.CHAR: 3970 if self._match(TokenType.CHARACTER_SET): 3971 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3972 elif self._match(TokenType.FORMAT): 3973 fmt_string = self._parse_string() 3974 fmt = self._parse_at_time_zone(fmt_string) 3975 3976 if to.this in exp.DataType.TEMPORAL_TYPES: 3977 this = self.expression( 3978 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3979 this=this, 3980 format=exp.Literal.string( 3981 format_time( 3982 fmt_string.this if fmt_string else "", 3983 self.FORMAT_MAPPING or self.TIME_MAPPING, 3984 self.FORMAT_TRIE or self.TIME_TRIE, 3985 ) 3986 ), 3987 ) 3988 3989 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3990 this.set("zone", fmt.args["zone"]) 3991 3992 return this 3993 3994 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3995 3996 def _parse_concat(self) -> t.Optional[exp.Expression]: 3997 args = self._parse_csv(self._parse_conjunction) 3998 if self.CONCAT_NULL_OUTPUTS_STRING: 3999 args = [ 4000 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4001 for arg in args 4002 if arg 4003 ] 4004 4005 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4006 # we find such a call we replace it with its argument. 4007 if len(args) == 1: 4008 return args[0] 4009 4010 return self.expression( 4011 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4012 ) 4013 4014 def _parse_string_agg(self) -> exp.Expression: 4015 if self._match(TokenType.DISTINCT): 4016 args: t.List[t.Optional[exp.Expression]] = [ 4017 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4018 ] 4019 if self._match(TokenType.COMMA): 4020 args.extend(self._parse_csv(self._parse_conjunction)) 4021 else: 4022 args = self._parse_csv(self._parse_conjunction) # type: ignore 4023 4024 index = self._index 4025 if not self._match(TokenType.R_PAREN) and args: 4026 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4027 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4028 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4029 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4030 4031 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4032 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4033 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4034 if not self._match_text_seq("WITHIN", "GROUP"): 4035 self._retreat(index) 4036 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4037 4038 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4039 order = self._parse_order(this=seq_get(args, 0)) 4040 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4041 4042 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4043 this = self._parse_bitwise() 4044 4045 if self._match(TokenType.USING): 4046 to: t.Optional[exp.Expression] = self.expression( 4047 exp.CharacterSet, this=self._parse_var() 4048 ) 4049 elif self._match(TokenType.COMMA): 4050 to = self._parse_types() 4051 else: 4052 to = None 4053 4054 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4055 4056 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4057 """ 4058 There are generally two variants of the DECODE function: 4059 4060 - DECODE(bin, charset) 4061 - DECODE(expression, search, result [, search, result] ... [, default]) 4062 4063 The second variant will always be parsed into a CASE expression. Note that NULL 4064 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4065 instead of relying on pattern matching. 4066 """ 4067 args = self._parse_csv(self._parse_conjunction) 4068 4069 if len(args) < 3: 4070 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4071 4072 expression, *expressions = args 4073 if not expression: 4074 return None 4075 4076 ifs = [] 4077 for search, result in zip(expressions[::2], expressions[1::2]): 4078 if not search or not result: 4079 return None 4080 4081 if isinstance(search, exp.Literal): 4082 ifs.append( 4083 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4084 ) 4085 elif isinstance(search, exp.Null): 4086 ifs.append( 4087 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4088 ) 4089 else: 4090 cond = exp.or_( 4091 exp.EQ(this=expression.copy(), expression=search), 4092 exp.and_( 4093 exp.Is(this=expression.copy(), expression=exp.Null()), 4094 exp.Is(this=search.copy(), expression=exp.Null()), 4095 copy=False, 4096 ), 4097 copy=False, 4098 ) 4099 ifs.append(exp.If(this=cond, true=result)) 4100 4101 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4102 4103 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4104 self._match_text_seq("KEY") 4105 key = self._parse_field() 4106 self._match(TokenType.COLON) 4107 self._match_text_seq("VALUE") 4108 value = self._parse_field() 4109 4110 if not key and not value: 4111 return None 4112 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4113 4114 def _parse_json_object(self) -> exp.JSONObject: 4115 star = self._parse_star() 4116 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4117 4118 null_handling = None 4119 if self._match_text_seq("NULL", "ON", "NULL"): 4120 null_handling = "NULL ON NULL" 4121 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4122 null_handling = "ABSENT ON NULL" 4123 4124 unique_keys = None 4125 if self._match_text_seq("WITH", "UNIQUE"): 4126 unique_keys = True 4127 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4128 unique_keys = False 4129 4130 self._match_text_seq("KEYS") 4131 4132 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4133 format_json = self._match_text_seq("FORMAT", "JSON") 4134 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4135 4136 return self.expression( 4137 exp.JSONObject, 4138 expressions=expressions, 4139 null_handling=null_handling, 4140 unique_keys=unique_keys, 4141 return_type=return_type, 4142 format_json=format_json, 4143 encoding=encoding, 4144 ) 4145 4146 def _parse_logarithm(self) -> exp.Func: 4147 # Default argument order is base, expression 4148 args = self._parse_csv(self._parse_range) 4149 4150 if len(args) > 1: 4151 if not self.LOG_BASE_FIRST: 4152 args.reverse() 4153 return exp.Log.from_arg_list(args) 4154 4155 return self.expression( 4156 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4157 ) 4158 4159 def _parse_match_against(self) -> exp.MatchAgainst: 4160 expressions = self._parse_csv(self._parse_column) 4161 4162 self._match_text_seq(")", "AGAINST", "(") 4163 4164 this = self._parse_string() 4165 4166 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4167 modifier = "IN NATURAL LANGUAGE MODE" 4168 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4169 modifier = f"{modifier} WITH QUERY EXPANSION" 4170 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4171 modifier = "IN BOOLEAN MODE" 4172 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4173 modifier = "WITH QUERY EXPANSION" 4174 else: 4175 modifier = None 4176 4177 return self.expression( 4178 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4179 ) 4180 4181 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4182 def _parse_open_json(self) -> exp.OpenJSON: 4183 this = self._parse_bitwise() 4184 path = self._match(TokenType.COMMA) and self._parse_string() 4185 4186 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4187 this = self._parse_field(any_token=True) 4188 kind = self._parse_types() 4189 path = self._parse_string() 4190 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4191 4192 return self.expression( 4193 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4194 ) 4195 4196 expressions = None 4197 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4198 self._match_l_paren() 4199 expressions = self._parse_csv(_parse_open_json_column_def) 4200 4201 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4202 4203 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4204 args = self._parse_csv(self._parse_bitwise) 4205 4206 if self._match(TokenType.IN): 4207 return self.expression( 4208 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4209 ) 4210 4211 if haystack_first: 4212 haystack = seq_get(args, 0) 4213 needle = seq_get(args, 1) 4214 else: 4215 needle = seq_get(args, 0) 4216 haystack = seq_get(args, 1) 4217 4218 return self.expression( 4219 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4220 ) 4221 4222 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4223 args = self._parse_csv(self._parse_table) 4224 return exp.JoinHint(this=func_name.upper(), expressions=args) 4225 4226 def _parse_substring(self) -> exp.Substring: 4227 # Postgres supports the form: substring(string [from int] [for int]) 4228 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4229 4230 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4231 4232 if self._match(TokenType.FROM): 4233 args.append(self._parse_bitwise()) 4234 if self._match(TokenType.FOR): 4235 args.append(self._parse_bitwise()) 4236 4237 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4238 4239 def _parse_trim(self) -> exp.Trim: 4240 # https://www.w3resource.com/sql/character-functions/trim.php 4241 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4242 4243 position = None 4244 collation = None 4245 4246 if self._match_texts(self.TRIM_TYPES): 4247 position = self._prev.text.upper() 4248 4249 expression = self._parse_bitwise() 4250 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4251 this = self._parse_bitwise() 4252 else: 4253 this = expression 4254 expression = None 4255 4256 if self._match(TokenType.COLLATE): 4257 collation = self._parse_bitwise() 4258 4259 return self.expression( 4260 exp.Trim, this=this, position=position, expression=expression, collation=collation 4261 ) 4262 4263 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4264 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4265 4266 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4267 return self._parse_window(self._parse_id_var(), alias=True) 4268 4269 def _parse_respect_or_ignore_nulls( 4270 self, this: t.Optional[exp.Expression] 4271 ) -> t.Optional[exp.Expression]: 4272 if self._match_text_seq("IGNORE", "NULLS"): 4273 return self.expression(exp.IgnoreNulls, this=this) 4274 if self._match_text_seq("RESPECT", "NULLS"): 4275 return self.expression(exp.RespectNulls, this=this) 4276 return this 4277 4278 def _parse_window( 4279 self, this: t.Optional[exp.Expression], alias: bool = False 4280 ) -> t.Optional[exp.Expression]: 4281 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4282 self._match(TokenType.WHERE) 4283 this = self.expression( 4284 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4285 ) 4286 self._match_r_paren() 4287 4288 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4289 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4290 if self._match_text_seq("WITHIN", "GROUP"): 4291 order = self._parse_wrapped(self._parse_order) 4292 this = self.expression(exp.WithinGroup, this=this, expression=order) 4293 4294 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4295 # Some dialects choose to implement and some do not. 4296 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4297 4298 # There is some code above in _parse_lambda that handles 4299 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4300 4301 # The below changes handle 4302 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4303 4304 # Oracle allows both formats 4305 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4306 # and Snowflake chose to do the same for familiarity 4307 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4308 this = self._parse_respect_or_ignore_nulls(this) 4309 4310 # bigquery select from window x AS (partition by ...) 4311 if alias: 4312 over = None 4313 self._match(TokenType.ALIAS) 4314 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4315 return this 4316 else: 4317 over = self._prev.text.upper() 4318 4319 if not self._match(TokenType.L_PAREN): 4320 return self.expression( 4321 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4322 ) 4323 4324 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4325 4326 first = self._match(TokenType.FIRST) 4327 if self._match_text_seq("LAST"): 4328 first = False 4329 4330 partition, order = self._parse_partition_and_order() 4331 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4332 4333 if kind: 4334 self._match(TokenType.BETWEEN) 4335 start = self._parse_window_spec() 4336 self._match(TokenType.AND) 4337 end = self._parse_window_spec() 4338 4339 spec = self.expression( 4340 exp.WindowSpec, 4341 kind=kind, 4342 start=start["value"], 4343 start_side=start["side"], 4344 end=end["value"], 4345 end_side=end["side"], 4346 ) 4347 else: 4348 spec = None 4349 4350 self._match_r_paren() 4351 4352 window = self.expression( 4353 exp.Window, 4354 this=this, 4355 partition_by=partition, 4356 order=order, 4357 spec=spec, 4358 alias=window_alias, 4359 over=over, 4360 first=first, 4361 ) 4362 4363 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4364 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4365 return self._parse_window(window, alias=alias) 4366 4367 return window 4368 4369 def _parse_partition_and_order( 4370 self, 4371 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4372 return self._parse_partition_by(), self._parse_order() 4373 4374 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4375 self._match(TokenType.BETWEEN) 4376 4377 return { 4378 "value": ( 4379 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4380 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4381 or self._parse_bitwise() 4382 ), 4383 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4384 } 4385 4386 def _parse_alias( 4387 self, this: t.Optional[exp.Expression], explicit: bool = False 4388 ) -> t.Optional[exp.Expression]: 4389 any_token = self._match(TokenType.ALIAS) 4390 4391 if explicit and not any_token: 4392 return this 4393 4394 if self._match(TokenType.L_PAREN): 4395 aliases = self.expression( 4396 exp.Aliases, 4397 this=this, 4398 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4399 ) 4400 self._match_r_paren(aliases) 4401 return aliases 4402 4403 alias = self._parse_id_var(any_token) 4404 4405 if alias: 4406 return self.expression(exp.Alias, this=this, alias=alias) 4407 4408 return this 4409 4410 def _parse_id_var( 4411 self, 4412 any_token: bool = True, 4413 tokens: t.Optional[t.Collection[TokenType]] = None, 4414 ) -> t.Optional[exp.Expression]: 4415 identifier = self._parse_identifier() 4416 4417 if identifier: 4418 return identifier 4419 4420 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4421 quoted = self._prev.token_type == TokenType.STRING 4422 return exp.Identifier(this=self._prev.text, quoted=quoted) 4423 4424 return None 4425 4426 def _parse_string(self) -> t.Optional[exp.Expression]: 4427 if self._match(TokenType.STRING): 4428 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4429 return self._parse_placeholder() 4430 4431 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4432 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4433 4434 def _parse_number(self) -> t.Optional[exp.Expression]: 4435 if self._match(TokenType.NUMBER): 4436 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4437 return self._parse_placeholder() 4438 4439 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4440 if self._match(TokenType.IDENTIFIER): 4441 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4442 return self._parse_placeholder() 4443 4444 def _parse_var( 4445 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4446 ) -> t.Optional[exp.Expression]: 4447 if ( 4448 (any_token and self._advance_any()) 4449 or self._match(TokenType.VAR) 4450 or (self._match_set(tokens) if tokens else False) 4451 ): 4452 return self.expression(exp.Var, this=self._prev.text) 4453 return self._parse_placeholder() 4454 4455 def _advance_any(self) -> t.Optional[Token]: 4456 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4457 self._advance() 4458 return self._prev 4459 return None 4460 4461 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4462 return self._parse_var() or self._parse_string() 4463 4464 def _parse_null(self) -> t.Optional[exp.Expression]: 4465 if self._match(TokenType.NULL): 4466 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4467 return self._parse_placeholder() 4468 4469 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4470 if self._match(TokenType.TRUE): 4471 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4472 if self._match(TokenType.FALSE): 4473 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4474 return self._parse_placeholder() 4475 4476 def _parse_star(self) -> t.Optional[exp.Expression]: 4477 if self._match(TokenType.STAR): 4478 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4479 return self._parse_placeholder() 4480 4481 def _parse_parameter(self) -> exp.Parameter: 4482 wrapped = self._match(TokenType.L_BRACE) 4483 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4484 self._match(TokenType.R_BRACE) 4485 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4486 4487 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4488 if self._match_set(self.PLACEHOLDER_PARSERS): 4489 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4490 if placeholder: 4491 return placeholder 4492 self._advance(-1) 4493 return None 4494 4495 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4496 if not self._match(TokenType.EXCEPT): 4497 return None 4498 if self._match(TokenType.L_PAREN, advance=False): 4499 return self._parse_wrapped_csv(self._parse_column) 4500 return self._parse_csv(self._parse_column) 4501 4502 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4503 if not self._match(TokenType.REPLACE): 4504 return None 4505 if self._match(TokenType.L_PAREN, advance=False): 4506 return self._parse_wrapped_csv(self._parse_expression) 4507 return self._parse_expressions() 4508 4509 def _parse_csv( 4510 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4511 ) -> t.List[exp.Expression]: 4512 parse_result = parse_method() 4513 items = [parse_result] if parse_result is not None else [] 4514 4515 while self._match(sep): 4516 self._add_comments(parse_result) 4517 parse_result = parse_method() 4518 if parse_result is not None: 4519 items.append(parse_result) 4520 4521 return items 4522 4523 def _parse_tokens( 4524 self, parse_method: t.Callable, expressions: t.Dict 4525 ) -> t.Optional[exp.Expression]: 4526 this = parse_method() 4527 4528 while self._match_set(expressions): 4529 this = self.expression( 4530 expressions[self._prev.token_type], 4531 this=this, 4532 comments=self._prev_comments, 4533 expression=parse_method(), 4534 ) 4535 4536 return this 4537 4538 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4539 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4540 4541 def _parse_wrapped_csv( 4542 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4543 ) -> t.List[exp.Expression]: 4544 return self._parse_wrapped( 4545 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4546 ) 4547 4548 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4549 wrapped = self._match(TokenType.L_PAREN) 4550 if not wrapped and not optional: 4551 self.raise_error("Expecting (") 4552 parse_result = parse_method() 4553 if wrapped: 4554 self._match_r_paren() 4555 return parse_result 4556 4557 def _parse_expressions(self) -> t.List[exp.Expression]: 4558 return self._parse_csv(self._parse_expression) 4559 4560 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4561 return self._parse_select() or self._parse_set_operations( 4562 self._parse_expression() if alias else self._parse_conjunction() 4563 ) 4564 4565 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4566 return self._parse_query_modifiers( 4567 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4568 ) 4569 4570 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4571 this = None 4572 if self._match_texts(self.TRANSACTION_KIND): 4573 this = self._prev.text 4574 4575 self._match_texts({"TRANSACTION", "WORK"}) 4576 4577 modes = [] 4578 while True: 4579 mode = [] 4580 while self._match(TokenType.VAR): 4581 mode.append(self._prev.text) 4582 4583 if mode: 4584 modes.append(" ".join(mode)) 4585 if not self._match(TokenType.COMMA): 4586 break 4587 4588 return self.expression(exp.Transaction, this=this, modes=modes) 4589 4590 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4591 chain = None 4592 savepoint = None 4593 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4594 4595 self._match_texts({"TRANSACTION", "WORK"}) 4596 4597 if self._match_text_seq("TO"): 4598 self._match_text_seq("SAVEPOINT") 4599 savepoint = self._parse_id_var() 4600 4601 if self._match(TokenType.AND): 4602 chain = not self._match_text_seq("NO") 4603 self._match_text_seq("CHAIN") 4604 4605 if is_rollback: 4606 return self.expression(exp.Rollback, savepoint=savepoint) 4607 4608 return self.expression(exp.Commit, chain=chain) 4609 4610 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4611 if not self._match_text_seq("ADD"): 4612 return None 4613 4614 self._match(TokenType.COLUMN) 4615 exists_column = self._parse_exists(not_=True) 4616 expression = self._parse_field_def() 4617 4618 if expression: 4619 expression.set("exists", exists_column) 4620 4621 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4622 if self._match_texts(("FIRST", "AFTER")): 4623 position = self._prev.text 4624 column_position = self.expression( 4625 exp.ColumnPosition, this=self._parse_column(), position=position 4626 ) 4627 expression.set("position", column_position) 4628 4629 return expression 4630 4631 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4632 drop = self._match(TokenType.DROP) and self._parse_drop() 4633 if drop and not isinstance(drop, exp.Command): 4634 drop.set("kind", drop.args.get("kind", "COLUMN")) 4635 return drop 4636 4637 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4638 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4639 return self.expression( 4640 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4641 ) 4642 4643 def _parse_add_constraint(self) -> exp.AddConstraint: 4644 this = None 4645 kind = self._prev.token_type 4646 4647 if kind == TokenType.CONSTRAINT: 4648 this = self._parse_id_var() 4649 4650 if self._match_text_seq("CHECK"): 4651 expression = self._parse_wrapped(self._parse_conjunction) 4652 enforced = self._match_text_seq("ENFORCED") 4653 4654 return self.expression( 4655 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4656 ) 4657 4658 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4659 expression = self._parse_foreign_key() 4660 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4661 expression = self._parse_primary_key() 4662 else: 4663 expression = None 4664 4665 return self.expression(exp.AddConstraint, this=this, expression=expression) 4666 4667 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4668 index = self._index - 1 4669 4670 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4671 return self._parse_csv(self._parse_add_constraint) 4672 4673 self._retreat(index) 4674 return self._parse_csv(self._parse_add_column) 4675 4676 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4677 self._match(TokenType.COLUMN) 4678 column = self._parse_field(any_token=True) 4679 4680 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4681 return self.expression(exp.AlterColumn, this=column, drop=True) 4682 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4683 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4684 4685 self._match_text_seq("SET", "DATA") 4686 return self.expression( 4687 exp.AlterColumn, 4688 this=column, 4689 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4690 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4691 using=self._match(TokenType.USING) and self._parse_conjunction(), 4692 ) 4693 4694 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4695 index = self._index - 1 4696 4697 partition_exists = self._parse_exists() 4698 if self._match(TokenType.PARTITION, advance=False): 4699 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4700 4701 self._retreat(index) 4702 return self._parse_csv(self._parse_drop_column) 4703 4704 def _parse_alter_table_rename(self) -> exp.RenameTable: 4705 self._match_text_seq("TO") 4706 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4707 4708 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4709 start = self._prev 4710 4711 if not self._match(TokenType.TABLE): 4712 return self._parse_as_command(start) 4713 4714 exists = self._parse_exists() 4715 this = self._parse_table(schema=True) 4716 4717 if self._next: 4718 self._advance() 4719 4720 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4721 if parser: 4722 actions = ensure_list(parser(self)) 4723 4724 if not self._curr: 4725 return self.expression( 4726 exp.AlterTable, 4727 this=this, 4728 exists=exists, 4729 actions=actions, 4730 ) 4731 return self._parse_as_command(start) 4732 4733 def _parse_merge(self) -> exp.Merge: 4734 self._match(TokenType.INTO) 4735 target = self._parse_table() 4736 4737 if target and self._match(TokenType.ALIAS, advance=False): 4738 target.set("alias", self._parse_table_alias()) 4739 4740 self._match(TokenType.USING) 4741 using = self._parse_table() 4742 4743 self._match(TokenType.ON) 4744 on = self._parse_conjunction() 4745 4746 whens = [] 4747 while self._match(TokenType.WHEN): 4748 matched = not self._match(TokenType.NOT) 4749 self._match_text_seq("MATCHED") 4750 source = ( 4751 False 4752 if self._match_text_seq("BY", "TARGET") 4753 else self._match_text_seq("BY", "SOURCE") 4754 ) 4755 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4756 4757 self._match(TokenType.THEN) 4758 4759 if self._match(TokenType.INSERT): 4760 _this = self._parse_star() 4761 if _this: 4762 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4763 else: 4764 then = self.expression( 4765 exp.Insert, 4766 this=self._parse_value(), 4767 expression=self._match(TokenType.VALUES) and self._parse_value(), 4768 ) 4769 elif self._match(TokenType.UPDATE): 4770 expressions = self._parse_star() 4771 if expressions: 4772 then = self.expression(exp.Update, expressions=expressions) 4773 else: 4774 then = self.expression( 4775 exp.Update, 4776 expressions=self._match(TokenType.SET) 4777 and self._parse_csv(self._parse_equality), 4778 ) 4779 elif self._match(TokenType.DELETE): 4780 then = self.expression(exp.Var, this=self._prev.text) 4781 else: 4782 then = None 4783 4784 whens.append( 4785 self.expression( 4786 exp.When, 4787 matched=matched, 4788 source=source, 4789 condition=condition, 4790 then=then, 4791 ) 4792 ) 4793 4794 return self.expression( 4795 exp.Merge, 4796 this=target, 4797 using=using, 4798 on=on, 4799 expressions=whens, 4800 ) 4801 4802 def _parse_show(self) -> t.Optional[exp.Expression]: 4803 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4804 if parser: 4805 return parser(self) 4806 self._advance() 4807 return self.expression(exp.Show, this=self._prev.text.upper()) 4808 4809 def _parse_set_item_assignment( 4810 self, kind: t.Optional[str] = None 4811 ) -> t.Optional[exp.Expression]: 4812 index = self._index 4813 4814 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4815 return self._parse_set_transaction(global_=kind == "GLOBAL") 4816 4817 left = self._parse_primary() or self._parse_id_var() 4818 4819 if not self._match_texts(("=", "TO")): 4820 self._retreat(index) 4821 return None 4822 4823 right = self._parse_statement() or self._parse_id_var() 4824 this = self.expression(exp.EQ, this=left, expression=right) 4825 4826 return self.expression(exp.SetItem, this=this, kind=kind) 4827 4828 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4829 self._match_text_seq("TRANSACTION") 4830 characteristics = self._parse_csv( 4831 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4832 ) 4833 return self.expression( 4834 exp.SetItem, 4835 expressions=characteristics, 4836 kind="TRANSACTION", 4837 **{"global": global_}, # type: ignore 4838 ) 4839 4840 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4841 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4842 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4843 4844 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4845 index = self._index 4846 set_ = self.expression( 4847 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4848 ) 4849 4850 if self._curr: 4851 self._retreat(index) 4852 return self._parse_as_command(self._prev) 4853 4854 return set_ 4855 4856 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4857 for option in options: 4858 if self._match_text_seq(*option.split(" ")): 4859 return exp.var(option) 4860 return None 4861 4862 def _parse_as_command(self, start: Token) -> exp.Command: 4863 while self._curr: 4864 self._advance() 4865 text = self._find_sql(start, self._prev) 4866 size = len(start.text) 4867 return exp.Command(this=text[:size], expression=text[size:]) 4868 4869 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4870 settings = [] 4871 4872 self._match_l_paren() 4873 kind = self._parse_id_var() 4874 4875 if self._match(TokenType.L_PAREN): 4876 while True: 4877 key = self._parse_id_var() 4878 value = self._parse_primary() 4879 4880 if not key and value is None: 4881 break 4882 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4883 self._match(TokenType.R_PAREN) 4884 4885 self._match_r_paren() 4886 4887 return self.expression( 4888 exp.DictProperty, 4889 this=this, 4890 kind=kind.this if kind else None, 4891 settings=settings, 4892 ) 4893 4894 def _parse_dict_range(self, this: str) -> exp.DictRange: 4895 self._match_l_paren() 4896 has_min = self._match_text_seq("MIN") 4897 if has_min: 4898 min = self._parse_var() or self._parse_primary() 4899 self._match_text_seq("MAX") 4900 max = self._parse_var() or self._parse_primary() 4901 else: 4902 max = self._parse_var() or self._parse_primary() 4903 min = exp.Literal.number(0) 4904 self._match_r_paren() 4905 return self.expression(exp.DictRange, this=this, min=min, max=max) 4906 4907 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4908 expression = self._parse_column() 4909 self._match(TokenType.IN) 4910 iterator = self._parse_column() 4911 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4912 return self.expression( 4913 exp.Comprehension, 4914 this=this, 4915 expression=expression, 4916 iterator=iterator, 4917 condition=condition, 4918 ) 4919 4920 def _find_parser( 4921 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4922 ) -> t.Optional[t.Callable]: 4923 if not self._curr: 4924 return None 4925 4926 index = self._index 4927 this = [] 4928 while True: 4929 # The current token might be multiple words 4930 curr = self._curr.text.upper() 4931 key = curr.split(" ") 4932 this.append(curr) 4933 4934 self._advance() 4935 result, trie = in_trie(trie, key) 4936 if result == TrieResult.FAILED: 4937 break 4938 4939 if result == TrieResult.EXISTS: 4940 subparser = parsers[" ".join(this)] 4941 return subparser 4942 4943 self._retreat(index) 4944 return None 4945 4946 def _match(self, token_type, advance=True, expression=None): 4947 if not self._curr: 4948 return None 4949 4950 if self._curr.token_type == token_type: 4951 if advance: 4952 self._advance() 4953 self._add_comments(expression) 4954 return True 4955 4956 return None 4957 4958 def _match_set(self, types, advance=True): 4959 if not self._curr: 4960 return None 4961 4962 if self._curr.token_type in types: 4963 if advance: 4964 self._advance() 4965 return True 4966 4967 return None 4968 4969 def _match_pair(self, token_type_a, token_type_b, advance=True): 4970 if not self._curr or not self._next: 4971 return None 4972 4973 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4974 if advance: 4975 self._advance(2) 4976 return True 4977 4978 return None 4979 4980 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4981 if not self._match(TokenType.L_PAREN, expression=expression): 4982 self.raise_error("Expecting (") 4983 4984 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4985 if not self._match(TokenType.R_PAREN, expression=expression): 4986 self.raise_error("Expecting )") 4987 4988 def _match_texts(self, texts, advance=True): 4989 if self._curr and self._curr.text.upper() in texts: 4990 if advance: 4991 self._advance() 4992 return True 4993 return False 4994 4995 def _match_text_seq(self, *texts, advance=True): 4996 index = self._index 4997 for text in texts: 4998 if self._curr and self._curr.text.upper() == text: 4999 self._advance() 5000 else: 5001 self._retreat(index) 5002 return False 5003 5004 if not advance: 5005 self._retreat(index) 5006 5007 return True 5008 5009 @t.overload 5010 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5011 ... 5012 5013 @t.overload 5014 def _replace_columns_with_dots( 5015 self, this: t.Optional[exp.Expression] 5016 ) -> t.Optional[exp.Expression]: 5017 ... 5018 5019 def _replace_columns_with_dots(self, this): 5020 if isinstance(this, exp.Dot): 5021 exp.replace_children(this, self._replace_columns_with_dots) 5022 elif isinstance(this, exp.Column): 5023 exp.replace_children(this, self._replace_columns_with_dots) 5024 table = this.args.get("table") 5025 this = ( 5026 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5027 ) 5028 5029 return this 5030 5031 def _replace_lambda( 5032 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5033 ) -> t.Optional[exp.Expression]: 5034 if not node: 5035 return node 5036 5037 for column in node.find_all(exp.Column): 5038 if column.parts[0].name in lambda_variables: 5039 dot_or_id = column.to_dot() if column.table else column.this 5040 parent = column.parent 5041 5042 while isinstance(parent, exp.Dot): 5043 if not isinstance(parent.parent, exp.Dot): 5044 parent.replace(dot_or_id) 5045 break 5046 parent = parent.parent 5047 else: 5048 if column is node: 5049 node = dot_or_id 5050 else: 5051 column.replace(dot_or_id) 5052 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.INSERT: lambda self: self._parse_insert(), 533 TokenType.LOAD: lambda self: self._parse_load(), 534 TokenType.MERGE: lambda self: self._parse_merge(), 535 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 536 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 537 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 538 TokenType.SET: lambda self: self._parse_set(), 539 TokenType.UNCACHE: lambda self: self._parse_uncache(), 540 TokenType.UPDATE: lambda self: self._parse_update(), 541 TokenType.USE: lambda self: self.expression( 542 exp.Use, 543 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 544 and exp.var(self._prev.text), 545 this=self._parse_table(schema=False), 546 ), 547 } 548 549 UNARY_PARSERS = { 550 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 551 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 552 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 553 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 554 } 555 556 PRIMARY_PARSERS = { 557 TokenType.STRING: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=True 559 ), 560 TokenType.NUMBER: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=False 562 ), 563 TokenType.STAR: lambda self, _: self.expression( 564 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 565 ), 566 TokenType.NULL: lambda self, _: self.expression(exp.Null), 567 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 568 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 569 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 570 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 571 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 572 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 573 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 574 exp.National, this=token.text 575 ), 576 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 577 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 578 } 579 580 PLACEHOLDER_PARSERS = { 581 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 582 TokenType.PARAMETER: lambda self: self._parse_parameter(), 583 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 584 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 585 else None, 586 } 587 588 RANGE_PARSERS = { 589 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 590 TokenType.GLOB: binary_range_parser(exp.Glob), 591 TokenType.ILIKE: binary_range_parser(exp.ILike), 592 TokenType.IN: lambda self, this: self._parse_in(this), 593 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 594 TokenType.IS: lambda self, this: self._parse_is(this), 595 TokenType.LIKE: binary_range_parser(exp.Like), 596 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 597 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 598 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 599 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 600 } 601 602 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 603 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 604 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 605 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 606 "CHARACTER SET": lambda self: self._parse_character_set(), 607 "CHECKSUM": lambda self: self._parse_checksum(), 608 "CLUSTER BY": lambda self: self._parse_cluster(), 609 "CLUSTERED": lambda self: self._parse_clustered_by(), 610 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 611 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 612 "COPY": lambda self: self._parse_copy_property(), 613 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 614 "DEFINER": lambda self: self._parse_definer(), 615 "DETERMINISTIC": lambda self: self.expression( 616 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 617 ), 618 "DISTKEY": lambda self: self._parse_distkey(), 619 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 620 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 621 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 622 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 623 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 624 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 625 "FREESPACE": lambda self: self._parse_freespace(), 626 "HEAP": lambda self: self.expression(exp.HeapProperty), 627 "IMMUTABLE": lambda self: self.expression( 628 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 629 ), 630 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 631 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 632 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 633 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 634 "LIKE": lambda self: self._parse_create_like(), 635 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 636 "LOCK": lambda self: self._parse_locking(), 637 "LOCKING": lambda self: self._parse_locking(), 638 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 639 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 640 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 641 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 642 "NO": lambda self: self._parse_no_property(), 643 "ON": lambda self: self._parse_on_property(), 644 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 645 "PARTITION BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 647 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 648 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 649 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 650 "RETURNS": lambda self: self._parse_returns(), 651 "ROW": lambda self: self._parse_row(), 652 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 653 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 654 "SETTINGS": lambda self: self.expression( 655 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 656 ), 657 "SORTKEY": lambda self: self._parse_sortkey(), 658 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 659 "STABLE": lambda self: self.expression( 660 exp.StabilityProperty, this=exp.Literal.string("STABLE") 661 ), 662 "STORED": lambda self: self._parse_stored(), 663 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 664 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 665 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 666 "TO": lambda self: self._parse_to_table(), 667 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 668 "TTL": lambda self: self._parse_ttl(), 669 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 670 "VOLATILE": lambda self: self._parse_volatile_property(), 671 "WITH": lambda self: self._parse_with_property(), 672 } 673 674 CONSTRAINT_PARSERS = { 675 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 676 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 677 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 678 "CHARACTER SET": lambda self: self.expression( 679 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "CHECK": lambda self: self.expression( 682 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 683 ), 684 "COLLATE": lambda self: self.expression( 685 exp.CollateColumnConstraint, this=self._parse_var() 686 ), 687 "COMMENT": lambda self: self.expression( 688 exp.CommentColumnConstraint, this=self._parse_string() 689 ), 690 "COMPRESS": lambda self: self._parse_compress(), 691 "CLUSTERED": lambda self: self.expression( 692 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 693 ), 694 "NONCLUSTERED": lambda self: self.expression( 695 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 696 ), 697 "DEFAULT": lambda self: self.expression( 698 exp.DefaultColumnConstraint, this=self._parse_bitwise() 699 ), 700 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 701 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 702 "FORMAT": lambda self: self.expression( 703 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 704 ), 705 "GENERATED": lambda self: self._parse_generated_as_identity(), 706 "IDENTITY": lambda self: self._parse_auto_increment(), 707 "INLINE": lambda self: self._parse_inline(), 708 "LIKE": lambda self: self._parse_create_like(), 709 "NOT": lambda self: self._parse_not_constraint(), 710 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 711 "ON": lambda self: ( 712 self._match(TokenType.UPDATE) 713 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 714 ) 715 or self.expression(exp.OnProperty, this=self._parse_id_var()), 716 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 717 "PRIMARY KEY": lambda self: self._parse_primary_key(), 718 "REFERENCES": lambda self: self._parse_references(match=False), 719 "TITLE": lambda self: self.expression( 720 exp.TitleColumnConstraint, this=self._parse_var_or_string() 721 ), 722 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 723 "UNIQUE": lambda self: self._parse_unique(), 724 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 725 "WITH": lambda self: self.expression( 726 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 727 ), 728 } 729 730 ALTER_PARSERS = { 731 "ADD": lambda self: self._parse_alter_table_add(), 732 "ALTER": lambda self: self._parse_alter_table_alter(), 733 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 734 "DROP": lambda self: self._parse_alter_table_drop(), 735 "RENAME": lambda self: self._parse_alter_table_rename(), 736 } 737 738 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 739 740 NO_PAREN_FUNCTION_PARSERS = { 741 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 742 "CASE": lambda self: self._parse_case(), 743 "IF": lambda self: self._parse_if(), 744 "NEXT": lambda self: self._parse_next_value_for(), 745 } 746 747 INVALID_FUNC_NAME_TOKENS = { 748 TokenType.IDENTIFIER, 749 TokenType.STRING, 750 } 751 752 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 753 754 FUNCTION_PARSERS = { 755 "ANY_VALUE": lambda self: self._parse_any_value(), 756 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 757 "CONCAT": lambda self: self._parse_concat(), 758 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 759 "DECODE": lambda self: self._parse_decode(), 760 "EXTRACT": lambda self: self._parse_extract(), 761 "JSON_OBJECT": lambda self: self._parse_json_object(), 762 "LOG": lambda self: self._parse_logarithm(), 763 "MATCH": lambda self: self._parse_match_against(), 764 "OPENJSON": lambda self: self._parse_open_json(), 765 "POSITION": lambda self: self._parse_position(), 766 "SAFE_CAST": lambda self: self._parse_cast(False), 767 "STRING_AGG": lambda self: self._parse_string_agg(), 768 "SUBSTRING": lambda self: self._parse_substring(), 769 "TRIM": lambda self: self._parse_trim(), 770 "TRY_CAST": lambda self: self._parse_cast(False), 771 "TRY_CONVERT": lambda self: self._parse_convert(False), 772 } 773 774 QUERY_MODIFIER_PARSERS = { 775 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 776 TokenType.WHERE: lambda self: ("where", self._parse_where()), 777 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 778 TokenType.HAVING: lambda self: ("having", self._parse_having()), 779 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 780 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 781 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 782 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 783 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 784 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 785 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 786 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 787 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 789 TokenType.CLUSTER_BY: lambda self: ( 790 "cluster", 791 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 792 ), 793 TokenType.DISTRIBUTE_BY: lambda self: ( 794 "distribute", 795 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 796 ), 797 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 798 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 799 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 800 } 801 802 SET_PARSERS = { 803 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 804 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 805 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 806 "TRANSACTION": lambda self: self._parse_set_transaction(), 807 } 808 809 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 810 811 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 812 813 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 814 815 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 816 817 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 818 819 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 820 TRANSACTION_CHARACTERISTICS = { 821 "ISOLATION LEVEL REPEATABLE READ", 822 "ISOLATION LEVEL READ COMMITTED", 823 "ISOLATION LEVEL READ UNCOMMITTED", 824 "ISOLATION LEVEL SERIALIZABLE", 825 "READ WRITE", 826 "READ ONLY", 827 } 828 829 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 830 831 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 832 833 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 834 835 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 836 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 837 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 838 839 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 840 841 DISTINCT_TOKENS = {TokenType.DISTINCT} 842 843 STRICT_CAST = True 844 845 # A NULL arg in CONCAT yields NULL by default 846 CONCAT_NULL_OUTPUTS_STRING = False 847 848 PREFIXED_PIVOT_COLUMNS = False 849 IDENTIFY_PIVOT_STRINGS = False 850 851 LOG_BASE_FIRST = True 852 LOG_DEFAULTS_TO_LN = False 853 854 SUPPORTS_USER_DEFINED_TYPES = True 855 856 __slots__ = ( 857 "error_level", 858 "error_message_context", 859 "max_errors", 860 "sql", 861 "errors", 862 "_tokens", 863 "_index", 864 "_curr", 865 "_next", 866 "_prev", 867 "_prev_comments", 868 "_tokenizer", 869 ) 870 871 # Autofilled 872 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 873 INDEX_OFFSET: int = 0 874 UNNEST_COLUMN_ONLY: bool = False 875 ALIAS_POST_TABLESAMPLE: bool = False 876 STRICT_STRING_CONCAT = False 877 NORMALIZE_FUNCTIONS = "upper" 878 NULL_ORDERING: str = "nulls_are_small" 879 SHOW_TRIE: t.Dict = {} 880 SET_TRIE: t.Dict = {} 881 FORMAT_MAPPING: t.Dict[str, str] = {} 882 FORMAT_TRIE: t.Dict = {} 883 TIME_MAPPING: t.Dict[str, str] = {} 884 TIME_TRIE: t.Dict = {} 885 886 def __init__( 887 self, 888 error_level: t.Optional[ErrorLevel] = None, 889 error_message_context: int = 100, 890 max_errors: int = 3, 891 ): 892 self.error_level = error_level or ErrorLevel.IMMEDIATE 893 self.error_message_context = error_message_context 894 self.max_errors = max_errors 895 self._tokenizer = self.TOKENIZER_CLASS() 896 self.reset() 897 898 def reset(self): 899 self.sql = "" 900 self.errors = [] 901 self._tokens = [] 902 self._index = 0 903 self._curr = None 904 self._next = None 905 self._prev = None 906 self._prev_comments = None 907 908 def parse( 909 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 910 ) -> t.List[t.Optional[exp.Expression]]: 911 """ 912 Parses a list of tokens and returns a list of syntax trees, one tree 913 per parsed SQL statement. 914 915 Args: 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The list of the produced syntax trees. 921 """ 922 return self._parse( 923 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 924 ) 925 926 def parse_into( 927 self, 928 expression_types: exp.IntoType, 929 raw_tokens: t.List[Token], 930 sql: t.Optional[str] = None, 931 ) -> t.List[t.Optional[exp.Expression]]: 932 """ 933 Parses a list of tokens into a given Expression type. If a collection of Expression 934 types is given instead, this method will try to parse the token list into each one 935 of them, stopping at the first for which the parsing succeeds. 936 937 Args: 938 expression_types: The expression type(s) to try and parse the token list into. 939 raw_tokens: The list of tokens. 940 sql: The original SQL string, used to produce helpful debug messages. 941 942 Returns: 943 The target Expression. 944 """ 945 errors = [] 946 for expression_type in ensure_list(expression_types): 947 parser = self.EXPRESSION_PARSERS.get(expression_type) 948 if not parser: 949 raise TypeError(f"No parser registered for {expression_type}") 950 951 try: 952 return self._parse(parser, raw_tokens, sql) 953 except ParseError as e: 954 e.errors[0]["into_expression"] = expression_type 955 errors.append(e) 956 957 raise ParseError( 958 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 959 errors=merge_errors(errors), 960 ) from errors[-1] 961 962 def _parse( 963 self, 964 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 965 raw_tokens: t.List[Token], 966 sql: t.Optional[str] = None, 967 ) -> t.List[t.Optional[exp.Expression]]: 968 self.reset() 969 self.sql = sql or "" 970 971 total = len(raw_tokens) 972 chunks: t.List[t.List[Token]] = [[]] 973 974 for i, token in enumerate(raw_tokens): 975 if token.token_type == TokenType.SEMICOLON: 976 if i < total - 1: 977 chunks.append([]) 978 else: 979 chunks[-1].append(token) 980 981 expressions = [] 982 983 for tokens in chunks: 984 self._index = -1 985 self._tokens = tokens 986 self._advance() 987 988 expressions.append(parse_method(self)) 989 990 if self._index < len(self._tokens): 991 self.raise_error("Invalid expression / Unexpected token") 992 993 self.check_errors() 994 995 return expressions 996 997 def check_errors(self) -> None: 998 """Logs or raises any found errors, depending on the chosen error level setting.""" 999 if self.error_level == ErrorLevel.WARN: 1000 for error in self.errors: 1001 logger.error(str(error)) 1002 elif self.error_level == ErrorLevel.RAISE and self.errors: 1003 raise ParseError( 1004 concat_messages(self.errors, self.max_errors), 1005 errors=merge_errors(self.errors), 1006 ) 1007 1008 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1009 """ 1010 Appends an error in the list of recorded errors or raises it, depending on the chosen 1011 error level setting. 1012 """ 1013 token = token or self._curr or self._prev or Token.string("") 1014 start = token.start 1015 end = token.end + 1 1016 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1017 highlight = self.sql[start:end] 1018 end_context = self.sql[end : end + self.error_message_context] 1019 1020 error = ParseError.new( 1021 f"{message}. Line {token.line}, Col: {token.col}.\n" 1022 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1023 description=message, 1024 line=token.line, 1025 col=token.col, 1026 start_context=start_context, 1027 highlight=highlight, 1028 end_context=end_context, 1029 ) 1030 1031 if self.error_level == ErrorLevel.IMMEDIATE: 1032 raise error 1033 1034 self.errors.append(error) 1035 1036 def expression( 1037 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1038 ) -> E: 1039 """ 1040 Creates a new, validated Expression. 1041 1042 Args: 1043 exp_class: The expression class to instantiate. 1044 comments: An optional list of comments to attach to the expression. 1045 kwargs: The arguments to set for the expression along with their respective values. 1046 1047 Returns: 1048 The target expression. 1049 """ 1050 instance = exp_class(**kwargs) 1051 instance.add_comments(comments) if comments else self._add_comments(instance) 1052 return self.validate_expression(instance) 1053 1054 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1055 if expression and self._prev_comments: 1056 expression.add_comments(self._prev_comments) 1057 self._prev_comments = None 1058 1059 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1060 """ 1061 Validates an Expression, making sure that all its mandatory arguments are set. 1062 1063 Args: 1064 expression: The expression to validate. 1065 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1066 1067 Returns: 1068 The validated expression. 1069 """ 1070 if self.error_level != ErrorLevel.IGNORE: 1071 for error_message in expression.error_messages(args): 1072 self.raise_error(error_message) 1073 1074 return expression 1075 1076 def _find_sql(self, start: Token, end: Token) -> str: 1077 return self.sql[start.start : end.end + 1] 1078 1079 def _advance(self, times: int = 1) -> None: 1080 self._index += times 1081 self._curr = seq_get(self._tokens, self._index) 1082 self._next = seq_get(self._tokens, self._index + 1) 1083 1084 if self._index > 0: 1085 self._prev = self._tokens[self._index - 1] 1086 self._prev_comments = self._prev.comments 1087 else: 1088 self._prev = None 1089 self._prev_comments = None 1090 1091 def _retreat(self, index: int) -> None: 1092 if index != self._index: 1093 self._advance(index - self._index) 1094 1095 def _parse_command(self) -> exp.Command: 1096 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1097 1098 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1099 start = self._prev 1100 exists = self._parse_exists() if allow_exists else None 1101 1102 self._match(TokenType.ON) 1103 1104 kind = self._match_set(self.CREATABLES) and self._prev 1105 if not kind: 1106 return self._parse_as_command(start) 1107 1108 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1109 this = self._parse_user_defined_function(kind=kind.token_type) 1110 elif kind.token_type == TokenType.TABLE: 1111 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1112 elif kind.token_type == TokenType.COLUMN: 1113 this = self._parse_column() 1114 else: 1115 this = self._parse_id_var() 1116 1117 self._match(TokenType.IS) 1118 1119 return self.expression( 1120 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1121 ) 1122 1123 def _parse_to_table( 1124 self, 1125 ) -> exp.ToTableProperty: 1126 table = self._parse_table_parts(schema=True) 1127 return self.expression(exp.ToTableProperty, this=table) 1128 1129 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1130 def _parse_ttl(self) -> exp.Expression: 1131 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1132 this = self._parse_bitwise() 1133 1134 if self._match_text_seq("DELETE"): 1135 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1136 if self._match_text_seq("RECOMPRESS"): 1137 return self.expression( 1138 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1139 ) 1140 if self._match_text_seq("TO", "DISK"): 1141 return self.expression( 1142 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1143 ) 1144 if self._match_text_seq("TO", "VOLUME"): 1145 return self.expression( 1146 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1147 ) 1148 1149 return this 1150 1151 expressions = self._parse_csv(_parse_ttl_action) 1152 where = self._parse_where() 1153 group = self._parse_group() 1154 1155 aggregates = None 1156 if group and self._match(TokenType.SET): 1157 aggregates = self._parse_csv(self._parse_set_item) 1158 1159 return self.expression( 1160 exp.MergeTreeTTL, 1161 expressions=expressions, 1162 where=where, 1163 group=group, 1164 aggregates=aggregates, 1165 ) 1166 1167 def _parse_statement(self) -> t.Optional[exp.Expression]: 1168 if self._curr is None: 1169 return None 1170 1171 if self._match_set(self.STATEMENT_PARSERS): 1172 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1173 1174 if self._match_set(Tokenizer.COMMANDS): 1175 return self._parse_command() 1176 1177 expression = self._parse_expression() 1178 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1179 return self._parse_query_modifiers(expression) 1180 1181 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1182 start = self._prev 1183 temporary = self._match(TokenType.TEMPORARY) 1184 materialized = self._match_text_seq("MATERIALIZED") 1185 1186 kind = self._match_set(self.CREATABLES) and self._prev.text 1187 if not kind: 1188 return self._parse_as_command(start) 1189 1190 return self.expression( 1191 exp.Drop, 1192 comments=start.comments, 1193 exists=exists or self._parse_exists(), 1194 this=self._parse_table(schema=True), 1195 kind=kind, 1196 temporary=temporary, 1197 materialized=materialized, 1198 cascade=self._match_text_seq("CASCADE"), 1199 constraints=self._match_text_seq("CONSTRAINTS"), 1200 purge=self._match_text_seq("PURGE"), 1201 ) 1202 1203 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1204 return ( 1205 self._match_text_seq("IF") 1206 and (not not_ or self._match(TokenType.NOT)) 1207 and self._match(TokenType.EXISTS) 1208 ) 1209 1210 def _parse_create(self) -> exp.Create | exp.Command: 1211 # Note: this can't be None because we've matched a statement parser 1212 start = self._prev 1213 comments = self._prev_comments 1214 1215 replace = start.text.upper() == "REPLACE" or self._match_pair( 1216 TokenType.OR, TokenType.REPLACE 1217 ) 1218 unique = self._match(TokenType.UNIQUE) 1219 1220 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1221 self._advance() 1222 1223 properties = None 1224 create_token = self._match_set(self.CREATABLES) and self._prev 1225 1226 if not create_token: 1227 # exp.Properties.Location.POST_CREATE 1228 properties = self._parse_properties() 1229 create_token = self._match_set(self.CREATABLES) and self._prev 1230 1231 if not properties or not create_token: 1232 return self._parse_as_command(start) 1233 1234 exists = self._parse_exists(not_=True) 1235 this = None 1236 expression: t.Optional[exp.Expression] = None 1237 indexes = None 1238 no_schema_binding = None 1239 begin = None 1240 clone = None 1241 1242 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1243 nonlocal properties 1244 if properties and temp_props: 1245 properties.expressions.extend(temp_props.expressions) 1246 elif temp_props: 1247 properties = temp_props 1248 1249 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1250 this = self._parse_user_defined_function(kind=create_token.token_type) 1251 1252 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1253 extend_props(self._parse_properties()) 1254 1255 self._match(TokenType.ALIAS) 1256 1257 if self._match(TokenType.COMMAND): 1258 expression = self._parse_as_command(self._prev) 1259 else: 1260 begin = self._match(TokenType.BEGIN) 1261 return_ = self._match_text_seq("RETURN") 1262 expression = self._parse_statement() 1263 1264 if return_: 1265 expression = self.expression(exp.Return, this=expression) 1266 elif create_token.token_type == TokenType.INDEX: 1267 this = self._parse_index(index=self._parse_id_var()) 1268 elif create_token.token_type in self.DB_CREATABLES: 1269 table_parts = self._parse_table_parts(schema=True) 1270 1271 # exp.Properties.Location.POST_NAME 1272 self._match(TokenType.COMMA) 1273 extend_props(self._parse_properties(before=True)) 1274 1275 this = self._parse_schema(this=table_parts) 1276 1277 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1278 extend_props(self._parse_properties()) 1279 1280 self._match(TokenType.ALIAS) 1281 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1282 # exp.Properties.Location.POST_ALIAS 1283 extend_props(self._parse_properties()) 1284 1285 expression = self._parse_ddl_select() 1286 1287 if create_token.token_type == TokenType.TABLE: 1288 # exp.Properties.Location.POST_EXPRESSION 1289 extend_props(self._parse_properties()) 1290 1291 indexes = [] 1292 while True: 1293 index = self._parse_index() 1294 1295 # exp.Properties.Location.POST_INDEX 1296 extend_props(self._parse_properties()) 1297 1298 if not index: 1299 break 1300 else: 1301 self._match(TokenType.COMMA) 1302 indexes.append(index) 1303 elif create_token.token_type == TokenType.VIEW: 1304 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1305 no_schema_binding = True 1306 1307 shallow = self._match_text_seq("SHALLOW") 1308 1309 if self._match_text_seq("CLONE"): 1310 clone = self._parse_table(schema=True) 1311 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1312 clone_kind = ( 1313 self._match(TokenType.L_PAREN) 1314 and self._match_texts(self.CLONE_KINDS) 1315 and self._prev.text.upper() 1316 ) 1317 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1318 self._match(TokenType.R_PAREN) 1319 clone = self.expression( 1320 exp.Clone, 1321 this=clone, 1322 when=when, 1323 kind=clone_kind, 1324 shallow=shallow, 1325 expression=clone_expression, 1326 ) 1327 1328 return self.expression( 1329 exp.Create, 1330 comments=comments, 1331 this=this, 1332 kind=create_token.text, 1333 replace=replace, 1334 unique=unique, 1335 expression=expression, 1336 exists=exists, 1337 properties=properties, 1338 indexes=indexes, 1339 no_schema_binding=no_schema_binding, 1340 begin=begin, 1341 clone=clone, 1342 ) 1343 1344 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1345 # only used for teradata currently 1346 self._match(TokenType.COMMA) 1347 1348 kwargs = { 1349 "no": self._match_text_seq("NO"), 1350 "dual": self._match_text_seq("DUAL"), 1351 "before": self._match_text_seq("BEFORE"), 1352 "default": self._match_text_seq("DEFAULT"), 1353 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1354 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1355 "after": self._match_text_seq("AFTER"), 1356 "minimum": self._match_texts(("MIN", "MINIMUM")), 1357 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1358 } 1359 1360 if self._match_texts(self.PROPERTY_PARSERS): 1361 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1362 try: 1363 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1364 except TypeError: 1365 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1366 1367 return None 1368 1369 def _parse_property(self) -> t.Optional[exp.Expression]: 1370 if self._match_texts(self.PROPERTY_PARSERS): 1371 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1372 1373 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1374 return self._parse_character_set(default=True) 1375 1376 if self._match_text_seq("COMPOUND", "SORTKEY"): 1377 return self._parse_sortkey(compound=True) 1378 1379 if self._match_text_seq("SQL", "SECURITY"): 1380 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1381 1382 assignment = self._match_pair( 1383 TokenType.VAR, TokenType.EQ, advance=False 1384 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1385 1386 if assignment: 1387 key = self._parse_var_or_string() 1388 self._match(TokenType.EQ) 1389 return self.expression( 1390 exp.Property, 1391 this=key, 1392 value=self._parse_column() or self._parse_var(any_token=True), 1393 ) 1394 1395 return None 1396 1397 def _parse_stored(self) -> exp.FileFormatProperty: 1398 self._match(TokenType.ALIAS) 1399 1400 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1401 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1402 1403 return self.expression( 1404 exp.FileFormatProperty, 1405 this=self.expression( 1406 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1407 ) 1408 if input_format or output_format 1409 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1410 ) 1411 1412 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1413 self._match(TokenType.EQ) 1414 self._match(TokenType.ALIAS) 1415 return self.expression(exp_class, this=self._parse_field()) 1416 1417 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1418 properties = [] 1419 while True: 1420 if before: 1421 prop = self._parse_property_before() 1422 else: 1423 prop = self._parse_property() 1424 1425 if not prop: 1426 break 1427 for p in ensure_list(prop): 1428 properties.append(p) 1429 1430 if properties: 1431 return self.expression(exp.Properties, expressions=properties) 1432 1433 return None 1434 1435 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1436 return self.expression( 1437 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1438 ) 1439 1440 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1441 if self._index >= 2: 1442 pre_volatile_token = self._tokens[self._index - 2] 1443 else: 1444 pre_volatile_token = None 1445 1446 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1447 return exp.VolatileProperty() 1448 1449 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1450 1451 def _parse_with_property( 1452 self, 1453 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1454 if self._match(TokenType.L_PAREN, advance=False): 1455 return self._parse_wrapped_csv(self._parse_property) 1456 1457 if self._match_text_seq("JOURNAL"): 1458 return self._parse_withjournaltable() 1459 1460 if self._match_text_seq("DATA"): 1461 return self._parse_withdata(no=False) 1462 elif self._match_text_seq("NO", "DATA"): 1463 return self._parse_withdata(no=True) 1464 1465 if not self._next: 1466 return None 1467 1468 return self._parse_withisolatedloading() 1469 1470 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1471 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1472 self._match(TokenType.EQ) 1473 1474 user = self._parse_id_var() 1475 self._match(TokenType.PARAMETER) 1476 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1477 1478 if not user or not host: 1479 return None 1480 1481 return exp.DefinerProperty(this=f"{user}@{host}") 1482 1483 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1484 self._match(TokenType.TABLE) 1485 self._match(TokenType.EQ) 1486 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1487 1488 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1489 return self.expression(exp.LogProperty, no=no) 1490 1491 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1492 return self.expression(exp.JournalProperty, **kwargs) 1493 1494 def _parse_checksum(self) -> exp.ChecksumProperty: 1495 self._match(TokenType.EQ) 1496 1497 on = None 1498 if self._match(TokenType.ON): 1499 on = True 1500 elif self._match_text_seq("OFF"): 1501 on = False 1502 1503 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1504 1505 def _parse_cluster(self) -> exp.Cluster: 1506 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1507 1508 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1509 self._match_text_seq("BY") 1510 1511 self._match_l_paren() 1512 expressions = self._parse_csv(self._parse_column) 1513 self._match_r_paren() 1514 1515 if self._match_text_seq("SORTED", "BY"): 1516 self._match_l_paren() 1517 sorted_by = self._parse_csv(self._parse_ordered) 1518 self._match_r_paren() 1519 else: 1520 sorted_by = None 1521 1522 self._match(TokenType.INTO) 1523 buckets = self._parse_number() 1524 self._match_text_seq("BUCKETS") 1525 1526 return self.expression( 1527 exp.ClusteredByProperty, 1528 expressions=expressions, 1529 sorted_by=sorted_by, 1530 buckets=buckets, 1531 ) 1532 1533 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1534 if not self._match_text_seq("GRANTS"): 1535 self._retreat(self._index - 1) 1536 return None 1537 1538 return self.expression(exp.CopyGrantsProperty) 1539 1540 def _parse_freespace(self) -> exp.FreespaceProperty: 1541 self._match(TokenType.EQ) 1542 return self.expression( 1543 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1544 ) 1545 1546 def _parse_mergeblockratio( 1547 self, no: bool = False, default: bool = False 1548 ) -> exp.MergeBlockRatioProperty: 1549 if self._match(TokenType.EQ): 1550 return self.expression( 1551 exp.MergeBlockRatioProperty, 1552 this=self._parse_number(), 1553 percent=self._match(TokenType.PERCENT), 1554 ) 1555 1556 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1557 1558 def _parse_datablocksize( 1559 self, 1560 default: t.Optional[bool] = None, 1561 minimum: t.Optional[bool] = None, 1562 maximum: t.Optional[bool] = None, 1563 ) -> exp.DataBlocksizeProperty: 1564 self._match(TokenType.EQ) 1565 size = self._parse_number() 1566 1567 units = None 1568 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1569 units = self._prev.text 1570 1571 return self.expression( 1572 exp.DataBlocksizeProperty, 1573 size=size, 1574 units=units, 1575 default=default, 1576 minimum=minimum, 1577 maximum=maximum, 1578 ) 1579 1580 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1581 self._match(TokenType.EQ) 1582 always = self._match_text_seq("ALWAYS") 1583 manual = self._match_text_seq("MANUAL") 1584 never = self._match_text_seq("NEVER") 1585 default = self._match_text_seq("DEFAULT") 1586 1587 autotemp = None 1588 if self._match_text_seq("AUTOTEMP"): 1589 autotemp = self._parse_schema() 1590 1591 return self.expression( 1592 exp.BlockCompressionProperty, 1593 always=always, 1594 manual=manual, 1595 never=never, 1596 default=default, 1597 autotemp=autotemp, 1598 ) 1599 1600 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1601 no = self._match_text_seq("NO") 1602 concurrent = self._match_text_seq("CONCURRENT") 1603 self._match_text_seq("ISOLATED", "LOADING") 1604 for_all = self._match_text_seq("FOR", "ALL") 1605 for_insert = self._match_text_seq("FOR", "INSERT") 1606 for_none = self._match_text_seq("FOR", "NONE") 1607 return self.expression( 1608 exp.IsolatedLoadingProperty, 1609 no=no, 1610 concurrent=concurrent, 1611 for_all=for_all, 1612 for_insert=for_insert, 1613 for_none=for_none, 1614 ) 1615 1616 def _parse_locking(self) -> exp.LockingProperty: 1617 if self._match(TokenType.TABLE): 1618 kind = "TABLE" 1619 elif self._match(TokenType.VIEW): 1620 kind = "VIEW" 1621 elif self._match(TokenType.ROW): 1622 kind = "ROW" 1623 elif self._match_text_seq("DATABASE"): 1624 kind = "DATABASE" 1625 else: 1626 kind = None 1627 1628 if kind in ("DATABASE", "TABLE", "VIEW"): 1629 this = self._parse_table_parts() 1630 else: 1631 this = None 1632 1633 if self._match(TokenType.FOR): 1634 for_or_in = "FOR" 1635 elif self._match(TokenType.IN): 1636 for_or_in = "IN" 1637 else: 1638 for_or_in = None 1639 1640 if self._match_text_seq("ACCESS"): 1641 lock_type = "ACCESS" 1642 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1643 lock_type = "EXCLUSIVE" 1644 elif self._match_text_seq("SHARE"): 1645 lock_type = "SHARE" 1646 elif self._match_text_seq("READ"): 1647 lock_type = "READ" 1648 elif self._match_text_seq("WRITE"): 1649 lock_type = "WRITE" 1650 elif self._match_text_seq("CHECKSUM"): 1651 lock_type = "CHECKSUM" 1652 else: 1653 lock_type = None 1654 1655 override = self._match_text_seq("OVERRIDE") 1656 1657 return self.expression( 1658 exp.LockingProperty, 1659 this=this, 1660 kind=kind, 1661 for_or_in=for_or_in, 1662 lock_type=lock_type, 1663 override=override, 1664 ) 1665 1666 def _parse_partition_by(self) -> t.List[exp.Expression]: 1667 if self._match(TokenType.PARTITION_BY): 1668 return self._parse_csv(self._parse_conjunction) 1669 return [] 1670 1671 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.PartitionedByProperty, 1675 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1676 ) 1677 1678 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1679 if self._match_text_seq("AND", "STATISTICS"): 1680 statistics = True 1681 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1682 statistics = False 1683 else: 1684 statistics = None 1685 1686 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1687 1688 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1689 if self._match_text_seq("PRIMARY", "INDEX"): 1690 return exp.NoPrimaryIndexProperty() 1691 return None 1692 1693 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1694 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1695 return exp.OnCommitProperty() 1696 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1697 return exp.OnCommitProperty(delete=True) 1698 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1699 1700 def _parse_distkey(self) -> exp.DistKeyProperty: 1701 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1702 1703 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1704 table = self._parse_table(schema=True) 1705 1706 options = [] 1707 while self._match_texts(("INCLUDING", "EXCLUDING")): 1708 this = self._prev.text.upper() 1709 1710 id_var = self._parse_id_var() 1711 if not id_var: 1712 return None 1713 1714 options.append( 1715 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1716 ) 1717 1718 return self.expression(exp.LikeProperty, this=table, expressions=options) 1719 1720 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1721 return self.expression( 1722 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1723 ) 1724 1725 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1726 self._match(TokenType.EQ) 1727 return self.expression( 1728 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1729 ) 1730 1731 def _parse_returns(self) -> exp.ReturnsProperty: 1732 value: t.Optional[exp.Expression] 1733 is_table = self._match(TokenType.TABLE) 1734 1735 if is_table: 1736 if self._match(TokenType.LT): 1737 value = self.expression( 1738 exp.Schema, 1739 this="TABLE", 1740 expressions=self._parse_csv(self._parse_struct_types), 1741 ) 1742 if not self._match(TokenType.GT): 1743 self.raise_error("Expecting >") 1744 else: 1745 value = self._parse_schema(exp.var("TABLE")) 1746 else: 1747 value = self._parse_types() 1748 1749 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1750 1751 def _parse_describe(self) -> exp.Describe: 1752 kind = self._match_set(self.CREATABLES) and self._prev.text 1753 this = self._parse_table() 1754 return self.expression(exp.Describe, this=this, kind=kind) 1755 1756 def _parse_insert(self) -> exp.Insert: 1757 comments = ensure_list(self._prev_comments) 1758 overwrite = self._match(TokenType.OVERWRITE) 1759 ignore = self._match(TokenType.IGNORE) 1760 local = self._match_text_seq("LOCAL") 1761 alternative = None 1762 1763 if self._match_text_seq("DIRECTORY"): 1764 this: t.Optional[exp.Expression] = self.expression( 1765 exp.Directory, 1766 this=self._parse_var_or_string(), 1767 local=local, 1768 row_format=self._parse_row_format(match_row=True), 1769 ) 1770 else: 1771 if self._match(TokenType.OR): 1772 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1773 1774 self._match(TokenType.INTO) 1775 comments += ensure_list(self._prev_comments) 1776 self._match(TokenType.TABLE) 1777 this = self._parse_table(schema=True) 1778 1779 returning = self._parse_returning() 1780 1781 return self.expression( 1782 exp.Insert, 1783 comments=comments, 1784 this=this, 1785 by_name=self._match_text_seq("BY", "NAME"), 1786 exists=self._parse_exists(), 1787 partition=self._parse_partition(), 1788 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1789 and self._parse_conjunction(), 1790 expression=self._parse_ddl_select(), 1791 conflict=self._parse_on_conflict(), 1792 returning=returning or self._parse_returning(), 1793 overwrite=overwrite, 1794 alternative=alternative, 1795 ignore=ignore, 1796 ) 1797 1798 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1799 conflict = self._match_text_seq("ON", "CONFLICT") 1800 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1801 1802 if not conflict and not duplicate: 1803 return None 1804 1805 nothing = None 1806 expressions = None 1807 key = None 1808 constraint = None 1809 1810 if conflict: 1811 if self._match_text_seq("ON", "CONSTRAINT"): 1812 constraint = self._parse_id_var() 1813 else: 1814 key = self._parse_csv(self._parse_value) 1815 1816 self._match_text_seq("DO") 1817 if self._match_text_seq("NOTHING"): 1818 nothing = True 1819 else: 1820 self._match(TokenType.UPDATE) 1821 self._match(TokenType.SET) 1822 expressions = self._parse_csv(self._parse_equality) 1823 1824 return self.expression( 1825 exp.OnConflict, 1826 duplicate=duplicate, 1827 expressions=expressions, 1828 nothing=nothing, 1829 key=key, 1830 constraint=constraint, 1831 ) 1832 1833 def _parse_returning(self) -> t.Optional[exp.Returning]: 1834 if not self._match(TokenType.RETURNING): 1835 return None 1836 return self.expression( 1837 exp.Returning, 1838 expressions=self._parse_csv(self._parse_expression), 1839 into=self._match(TokenType.INTO) and self._parse_table_part(), 1840 ) 1841 1842 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1843 if not self._match(TokenType.FORMAT): 1844 return None 1845 return self._parse_row_format() 1846 1847 def _parse_row_format( 1848 self, match_row: bool = False 1849 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1850 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1851 return None 1852 1853 if self._match_text_seq("SERDE"): 1854 this = self._parse_string() 1855 1856 serde_properties = None 1857 if self._match(TokenType.SERDE_PROPERTIES): 1858 serde_properties = self.expression( 1859 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1860 ) 1861 1862 return self.expression( 1863 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1864 ) 1865 1866 self._match_text_seq("DELIMITED") 1867 1868 kwargs = {} 1869 1870 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1871 kwargs["fields"] = self._parse_string() 1872 if self._match_text_seq("ESCAPED", "BY"): 1873 kwargs["escaped"] = self._parse_string() 1874 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1875 kwargs["collection_items"] = self._parse_string() 1876 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1877 kwargs["map_keys"] = self._parse_string() 1878 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1879 kwargs["lines"] = self._parse_string() 1880 if self._match_text_seq("NULL", "DEFINED", "AS"): 1881 kwargs["null"] = self._parse_string() 1882 1883 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1884 1885 def _parse_load(self) -> exp.LoadData | exp.Command: 1886 if self._match_text_seq("DATA"): 1887 local = self._match_text_seq("LOCAL") 1888 self._match_text_seq("INPATH") 1889 inpath = self._parse_string() 1890 overwrite = self._match(TokenType.OVERWRITE) 1891 self._match_pair(TokenType.INTO, TokenType.TABLE) 1892 1893 return self.expression( 1894 exp.LoadData, 1895 this=self._parse_table(schema=True), 1896 local=local, 1897 overwrite=overwrite, 1898 inpath=inpath, 1899 partition=self._parse_partition(), 1900 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1901 serde=self._match_text_seq("SERDE") and self._parse_string(), 1902 ) 1903 return self._parse_as_command(self._prev) 1904 1905 def _parse_delete(self) -> exp.Delete: 1906 # This handles MySQL's "Multiple-Table Syntax" 1907 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1908 tables = None 1909 comments = self._prev_comments 1910 if not self._match(TokenType.FROM, advance=False): 1911 tables = self._parse_csv(self._parse_table) or None 1912 1913 returning = self._parse_returning() 1914 1915 return self.expression( 1916 exp.Delete, 1917 comments=comments, 1918 tables=tables, 1919 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1920 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1921 where=self._parse_where(), 1922 returning=returning or self._parse_returning(), 1923 limit=self._parse_limit(), 1924 ) 1925 1926 def _parse_update(self) -> exp.Update: 1927 comments = self._prev_comments 1928 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1929 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1930 returning = self._parse_returning() 1931 return self.expression( 1932 exp.Update, 1933 comments=comments, 1934 **{ # type: ignore 1935 "this": this, 1936 "expressions": expressions, 1937 "from": self._parse_from(joins=True), 1938 "where": self._parse_where(), 1939 "returning": returning or self._parse_returning(), 1940 "limit": self._parse_limit(), 1941 }, 1942 ) 1943 1944 def _parse_uncache(self) -> exp.Uncache: 1945 if not self._match(TokenType.TABLE): 1946 self.raise_error("Expecting TABLE after UNCACHE") 1947 1948 return self.expression( 1949 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1950 ) 1951 1952 def _parse_cache(self) -> exp.Cache: 1953 lazy = self._match_text_seq("LAZY") 1954 self._match(TokenType.TABLE) 1955 table = self._parse_table(schema=True) 1956 1957 options = [] 1958 if self._match_text_seq("OPTIONS"): 1959 self._match_l_paren() 1960 k = self._parse_string() 1961 self._match(TokenType.EQ) 1962 v = self._parse_string() 1963 options = [k, v] 1964 self._match_r_paren() 1965 1966 self._match(TokenType.ALIAS) 1967 return self.expression( 1968 exp.Cache, 1969 this=table, 1970 lazy=lazy, 1971 options=options, 1972 expression=self._parse_select(nested=True), 1973 ) 1974 1975 def _parse_partition(self) -> t.Optional[exp.Partition]: 1976 if not self._match(TokenType.PARTITION): 1977 return None 1978 1979 return self.expression( 1980 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1981 ) 1982 1983 def _parse_value(self) -> exp.Tuple: 1984 if self._match(TokenType.L_PAREN): 1985 expressions = self._parse_csv(self._parse_conjunction) 1986 self._match_r_paren() 1987 return self.expression(exp.Tuple, expressions=expressions) 1988 1989 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1990 # https://prestodb.io/docs/current/sql/values.html 1991 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1992 1993 def _parse_projections(self) -> t.List[exp.Expression]: 1994 return self._parse_expressions() 1995 1996 def _parse_select( 1997 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1998 ) -> t.Optional[exp.Expression]: 1999 cte = self._parse_with() 2000 2001 if cte: 2002 this = self._parse_statement() 2003 2004 if not this: 2005 self.raise_error("Failed to parse any statement following CTE") 2006 return cte 2007 2008 if "with" in this.arg_types: 2009 this.set("with", cte) 2010 else: 2011 self.raise_error(f"{this.key} does not support CTE") 2012 this = cte 2013 2014 return this 2015 2016 # duckdb supports leading with FROM x 2017 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2018 2019 if self._match(TokenType.SELECT): 2020 comments = self._prev_comments 2021 2022 hint = self._parse_hint() 2023 all_ = self._match(TokenType.ALL) 2024 distinct = self._match_set(self.DISTINCT_TOKENS) 2025 2026 kind = ( 2027 self._match(TokenType.ALIAS) 2028 and self._match_texts(("STRUCT", "VALUE")) 2029 and self._prev.text 2030 ) 2031 2032 if distinct: 2033 distinct = self.expression( 2034 exp.Distinct, 2035 on=self._parse_value() if self._match(TokenType.ON) else None, 2036 ) 2037 2038 if all_ and distinct: 2039 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2040 2041 limit = self._parse_limit(top=True) 2042 projections = self._parse_projections() 2043 2044 this = self.expression( 2045 exp.Select, 2046 kind=kind, 2047 hint=hint, 2048 distinct=distinct, 2049 expressions=projections, 2050 limit=limit, 2051 ) 2052 this.comments = comments 2053 2054 into = self._parse_into() 2055 if into: 2056 this.set("into", into) 2057 2058 if not from_: 2059 from_ = self._parse_from() 2060 2061 if from_: 2062 this.set("from", from_) 2063 2064 this = self._parse_query_modifiers(this) 2065 elif (table or nested) and self._match(TokenType.L_PAREN): 2066 if self._match(TokenType.PIVOT): 2067 this = self._parse_simplified_pivot() 2068 elif self._match(TokenType.FROM): 2069 this = exp.select("*").from_( 2070 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2071 ) 2072 else: 2073 this = self._parse_table() if table else self._parse_select(nested=True) 2074 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2075 2076 self._match_r_paren() 2077 2078 # We return early here so that the UNION isn't attached to the subquery by the 2079 # following call to _parse_set_operations, but instead becomes the parent node 2080 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2081 elif self._match(TokenType.VALUES): 2082 this = self.expression( 2083 exp.Values, 2084 expressions=self._parse_csv(self._parse_value), 2085 alias=self._parse_table_alias(), 2086 ) 2087 elif from_: 2088 this = exp.select("*").from_(from_.this, copy=False) 2089 else: 2090 this = None 2091 2092 return self._parse_set_operations(this) 2093 2094 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2095 if not skip_with_token and not self._match(TokenType.WITH): 2096 return None 2097 2098 comments = self._prev_comments 2099 recursive = self._match(TokenType.RECURSIVE) 2100 2101 expressions = [] 2102 while True: 2103 expressions.append(self._parse_cte()) 2104 2105 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2106 break 2107 else: 2108 self._match(TokenType.WITH) 2109 2110 return self.expression( 2111 exp.With, comments=comments, expressions=expressions, recursive=recursive 2112 ) 2113 2114 def _parse_cte(self) -> exp.CTE: 2115 alias = self._parse_table_alias() 2116 if not alias or not alias.this: 2117 self.raise_error("Expected CTE to have alias") 2118 2119 self._match(TokenType.ALIAS) 2120 return self.expression( 2121 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2122 ) 2123 2124 def _parse_table_alias( 2125 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2126 ) -> t.Optional[exp.TableAlias]: 2127 any_token = self._match(TokenType.ALIAS) 2128 alias = ( 2129 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2130 or self._parse_string_as_identifier() 2131 ) 2132 2133 index = self._index 2134 if self._match(TokenType.L_PAREN): 2135 columns = self._parse_csv(self._parse_function_parameter) 2136 self._match_r_paren() if columns else self._retreat(index) 2137 else: 2138 columns = None 2139 2140 if not alias and not columns: 2141 return None 2142 2143 return self.expression(exp.TableAlias, this=alias, columns=columns) 2144 2145 def _parse_subquery( 2146 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2147 ) -> t.Optional[exp.Subquery]: 2148 if not this: 2149 return None 2150 2151 return self.expression( 2152 exp.Subquery, 2153 this=this, 2154 pivots=self._parse_pivots(), 2155 alias=self._parse_table_alias() if parse_alias else None, 2156 ) 2157 2158 def _parse_query_modifiers( 2159 self, this: t.Optional[exp.Expression] 2160 ) -> t.Optional[exp.Expression]: 2161 if isinstance(this, self.MODIFIABLES): 2162 for join in iter(self._parse_join, None): 2163 this.append("joins", join) 2164 for lateral in iter(self._parse_lateral, None): 2165 this.append("laterals", lateral) 2166 2167 while True: 2168 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2169 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2170 key, expression = parser(self) 2171 2172 if expression: 2173 this.set(key, expression) 2174 if key == "limit": 2175 offset = expression.args.pop("offset", None) 2176 if offset: 2177 this.set("offset", exp.Offset(expression=offset)) 2178 continue 2179 break 2180 return this 2181 2182 def _parse_hint(self) -> t.Optional[exp.Hint]: 2183 if self._match(TokenType.HINT): 2184 hints = [] 2185 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2186 hints.extend(hint) 2187 2188 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2189 self.raise_error("Expected */ after HINT") 2190 2191 return self.expression(exp.Hint, expressions=hints) 2192 2193 return None 2194 2195 def _parse_into(self) -> t.Optional[exp.Into]: 2196 if not self._match(TokenType.INTO): 2197 return None 2198 2199 temp = self._match(TokenType.TEMPORARY) 2200 unlogged = self._match_text_seq("UNLOGGED") 2201 self._match(TokenType.TABLE) 2202 2203 return self.expression( 2204 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2205 ) 2206 2207 def _parse_from( 2208 self, joins: bool = False, skip_from_token: bool = False 2209 ) -> t.Optional[exp.From]: 2210 if not skip_from_token and not self._match(TokenType.FROM): 2211 return None 2212 2213 return self.expression( 2214 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2215 ) 2216 2217 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2218 if not self._match(TokenType.MATCH_RECOGNIZE): 2219 return None 2220 2221 self._match_l_paren() 2222 2223 partition = self._parse_partition_by() 2224 order = self._parse_order() 2225 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2226 2227 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2228 rows = exp.var("ONE ROW PER MATCH") 2229 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2230 text = "ALL ROWS PER MATCH" 2231 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2232 text += f" SHOW EMPTY MATCHES" 2233 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2234 text += f" OMIT EMPTY MATCHES" 2235 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2236 text += f" WITH UNMATCHED ROWS" 2237 rows = exp.var(text) 2238 else: 2239 rows = None 2240 2241 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2242 text = "AFTER MATCH SKIP" 2243 if self._match_text_seq("PAST", "LAST", "ROW"): 2244 text += f" PAST LAST ROW" 2245 elif self._match_text_seq("TO", "NEXT", "ROW"): 2246 text += f" TO NEXT ROW" 2247 elif self._match_text_seq("TO", "FIRST"): 2248 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2249 elif self._match_text_seq("TO", "LAST"): 2250 text += f" TO LAST {self._advance_any().text}" # type: ignore 2251 after = exp.var(text) 2252 else: 2253 after = None 2254 2255 if self._match_text_seq("PATTERN"): 2256 self._match_l_paren() 2257 2258 if not self._curr: 2259 self.raise_error("Expecting )", self._curr) 2260 2261 paren = 1 2262 start = self._curr 2263 2264 while self._curr and paren > 0: 2265 if self._curr.token_type == TokenType.L_PAREN: 2266 paren += 1 2267 if self._curr.token_type == TokenType.R_PAREN: 2268 paren -= 1 2269 2270 end = self._prev 2271 self._advance() 2272 2273 if paren > 0: 2274 self.raise_error("Expecting )", self._curr) 2275 2276 pattern = exp.var(self._find_sql(start, end)) 2277 else: 2278 pattern = None 2279 2280 define = ( 2281 self._parse_csv( 2282 lambda: self.expression( 2283 exp.Alias, 2284 alias=self._parse_id_var(any_token=True), 2285 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2286 ) 2287 ) 2288 if self._match_text_seq("DEFINE") 2289 else None 2290 ) 2291 2292 self._match_r_paren() 2293 2294 return self.expression( 2295 exp.MatchRecognize, 2296 partition_by=partition, 2297 order=order, 2298 measures=measures, 2299 rows=rows, 2300 after=after, 2301 pattern=pattern, 2302 define=define, 2303 alias=self._parse_table_alias(), 2304 ) 2305 2306 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2307 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2308 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2309 2310 if outer_apply or cross_apply: 2311 this = self._parse_select(table=True) 2312 view = None 2313 outer = not cross_apply 2314 elif self._match(TokenType.LATERAL): 2315 this = self._parse_select(table=True) 2316 view = self._match(TokenType.VIEW) 2317 outer = self._match(TokenType.OUTER) 2318 else: 2319 return None 2320 2321 if not this: 2322 this = ( 2323 self._parse_unnest() 2324 or self._parse_function() 2325 or self._parse_id_var(any_token=False) 2326 ) 2327 2328 while self._match(TokenType.DOT): 2329 this = exp.Dot( 2330 this=this, 2331 expression=self._parse_function() or self._parse_id_var(any_token=False), 2332 ) 2333 2334 if view: 2335 table = self._parse_id_var(any_token=False) 2336 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2337 table_alias: t.Optional[exp.TableAlias] = self.expression( 2338 exp.TableAlias, this=table, columns=columns 2339 ) 2340 elif isinstance(this, exp.Subquery) and this.alias: 2341 # Ensures parity between the Subquery's and the Lateral's "alias" args 2342 table_alias = this.args["alias"].copy() 2343 else: 2344 table_alias = self._parse_table_alias() 2345 2346 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2347 2348 def _parse_join_parts( 2349 self, 2350 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2351 return ( 2352 self._match_set(self.JOIN_METHODS) and self._prev, 2353 self._match_set(self.JOIN_SIDES) and self._prev, 2354 self._match_set(self.JOIN_KINDS) and self._prev, 2355 ) 2356 2357 def _parse_join( 2358 self, skip_join_token: bool = False, parse_bracket: bool = False 2359 ) -> t.Optional[exp.Join]: 2360 if self._match(TokenType.COMMA): 2361 return self.expression(exp.Join, this=self._parse_table()) 2362 2363 index = self._index 2364 method, side, kind = self._parse_join_parts() 2365 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2366 join = self._match(TokenType.JOIN) 2367 2368 if not skip_join_token and not join: 2369 self._retreat(index) 2370 kind = None 2371 method = None 2372 side = None 2373 2374 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2375 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2376 2377 if not skip_join_token and not join and not outer_apply and not cross_apply: 2378 return None 2379 2380 if outer_apply: 2381 side = Token(TokenType.LEFT, "LEFT") 2382 2383 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2384 2385 if method: 2386 kwargs["method"] = method.text 2387 if side: 2388 kwargs["side"] = side.text 2389 if kind: 2390 kwargs["kind"] = kind.text 2391 if hint: 2392 kwargs["hint"] = hint 2393 2394 if self._match(TokenType.ON): 2395 kwargs["on"] = self._parse_conjunction() 2396 elif self._match(TokenType.USING): 2397 kwargs["using"] = self._parse_wrapped_id_vars() 2398 elif not (kind and kind.token_type == TokenType.CROSS): 2399 index = self._index 2400 joins = self._parse_joins() 2401 2402 if joins and self._match(TokenType.ON): 2403 kwargs["on"] = self._parse_conjunction() 2404 elif joins and self._match(TokenType.USING): 2405 kwargs["using"] = self._parse_wrapped_id_vars() 2406 else: 2407 joins = None 2408 self._retreat(index) 2409 2410 kwargs["this"].set("joins", joins) 2411 2412 comments = [c for token in (method, side, kind) if token for c in token.comments] 2413 return self.expression(exp.Join, comments=comments, **kwargs) 2414 2415 def _parse_index( 2416 self, 2417 index: t.Optional[exp.Expression] = None, 2418 ) -> t.Optional[exp.Index]: 2419 if index: 2420 unique = None 2421 primary = None 2422 amp = None 2423 2424 self._match(TokenType.ON) 2425 self._match(TokenType.TABLE) # hive 2426 table = self._parse_table_parts(schema=True) 2427 else: 2428 unique = self._match(TokenType.UNIQUE) 2429 primary = self._match_text_seq("PRIMARY") 2430 amp = self._match_text_seq("AMP") 2431 2432 if not self._match(TokenType.INDEX): 2433 return None 2434 2435 index = self._parse_id_var() 2436 table = None 2437 2438 using = self._parse_field() if self._match(TokenType.USING) else None 2439 2440 if self._match(TokenType.L_PAREN, advance=False): 2441 columns = self._parse_wrapped_csv(self._parse_ordered) 2442 else: 2443 columns = None 2444 2445 return self.expression( 2446 exp.Index, 2447 this=index, 2448 table=table, 2449 using=using, 2450 columns=columns, 2451 unique=unique, 2452 primary=primary, 2453 amp=amp, 2454 partition_by=self._parse_partition_by(), 2455 ) 2456 2457 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2458 hints: t.List[exp.Expression] = [] 2459 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2460 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2461 hints.append( 2462 self.expression( 2463 exp.WithTableHint, 2464 expressions=self._parse_csv( 2465 lambda: self._parse_function() or self._parse_var(any_token=True) 2466 ), 2467 ) 2468 ) 2469 self._match_r_paren() 2470 else: 2471 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2472 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2473 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2474 2475 self._match_texts({"INDEX", "KEY"}) 2476 if self._match(TokenType.FOR): 2477 hint.set("target", self._advance_any() and self._prev.text.upper()) 2478 2479 hint.set("expressions", self._parse_wrapped_id_vars()) 2480 hints.append(hint) 2481 2482 return hints or None 2483 2484 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2485 return ( 2486 (not schema and self._parse_function(optional_parens=False)) 2487 or self._parse_id_var(any_token=False) 2488 or self._parse_string_as_identifier() 2489 or self._parse_placeholder() 2490 ) 2491 2492 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2493 catalog = None 2494 db = None 2495 table = self._parse_table_part(schema=schema) 2496 2497 while self._match(TokenType.DOT): 2498 if catalog: 2499 # This allows nesting the table in arbitrarily many dot expressions if needed 2500 table = self.expression( 2501 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2502 ) 2503 else: 2504 catalog = db 2505 db = table 2506 table = self._parse_table_part(schema=schema) 2507 2508 if not table: 2509 self.raise_error(f"Expected table name but got {self._curr}") 2510 2511 return self.expression( 2512 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2513 ) 2514 2515 def _parse_table( 2516 self, 2517 schema: bool = False, 2518 joins: bool = False, 2519 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2520 parse_bracket: bool = False, 2521 ) -> t.Optional[exp.Expression]: 2522 lateral = self._parse_lateral() 2523 if lateral: 2524 return lateral 2525 2526 unnest = self._parse_unnest() 2527 if unnest: 2528 return unnest 2529 2530 values = self._parse_derived_table_values() 2531 if values: 2532 return values 2533 2534 subquery = self._parse_select(table=True) 2535 if subquery: 2536 if not subquery.args.get("pivots"): 2537 subquery.set("pivots", self._parse_pivots()) 2538 return subquery 2539 2540 bracket = parse_bracket and self._parse_bracket(None) 2541 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2542 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2543 2544 if schema: 2545 return self._parse_schema(this=this) 2546 2547 if self.ALIAS_POST_TABLESAMPLE: 2548 table_sample = self._parse_table_sample() 2549 2550 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2551 if alias: 2552 this.set("alias", alias) 2553 2554 if not this.args.get("pivots"): 2555 this.set("pivots", self._parse_pivots()) 2556 2557 this.set("hints", self._parse_table_hints()) 2558 2559 if not self.ALIAS_POST_TABLESAMPLE: 2560 table_sample = self._parse_table_sample() 2561 2562 if table_sample: 2563 table_sample.set("this", this) 2564 this = table_sample 2565 2566 if joins: 2567 for join in iter(self._parse_join, None): 2568 this.append("joins", join) 2569 2570 return this 2571 2572 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2573 if not self._match(TokenType.UNNEST): 2574 return None 2575 2576 expressions = self._parse_wrapped_csv(self._parse_type) 2577 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2578 2579 alias = self._parse_table_alias() if with_alias else None 2580 2581 if alias and self.UNNEST_COLUMN_ONLY: 2582 if alias.args.get("columns"): 2583 self.raise_error("Unexpected extra column alias in unnest.") 2584 2585 alias.set("columns", [alias.this]) 2586 alias.set("this", None) 2587 2588 offset = None 2589 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2590 self._match(TokenType.ALIAS) 2591 offset = self._parse_id_var() or exp.to_identifier("offset") 2592 2593 return self.expression( 2594 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2595 ) 2596 2597 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2598 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2599 if not is_derived and not self._match(TokenType.VALUES): 2600 return None 2601 2602 expressions = self._parse_csv(self._parse_value) 2603 alias = self._parse_table_alias() 2604 2605 if is_derived: 2606 self._match_r_paren() 2607 2608 return self.expression( 2609 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2610 ) 2611 2612 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2613 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2614 as_modifier and self._match_text_seq("USING", "SAMPLE") 2615 ): 2616 return None 2617 2618 bucket_numerator = None 2619 bucket_denominator = None 2620 bucket_field = None 2621 percent = None 2622 rows = None 2623 size = None 2624 seed = None 2625 2626 kind = ( 2627 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2628 ) 2629 method = self._parse_var(tokens=(TokenType.ROW,)) 2630 2631 self._match(TokenType.L_PAREN) 2632 2633 num = self._parse_number() 2634 2635 if self._match_text_seq("BUCKET"): 2636 bucket_numerator = self._parse_number() 2637 self._match_text_seq("OUT", "OF") 2638 bucket_denominator = bucket_denominator = self._parse_number() 2639 self._match(TokenType.ON) 2640 bucket_field = self._parse_field() 2641 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2642 percent = num 2643 elif self._match(TokenType.ROWS): 2644 rows = num 2645 else: 2646 size = num 2647 2648 self._match(TokenType.R_PAREN) 2649 2650 if self._match(TokenType.L_PAREN): 2651 method = self._parse_var() 2652 seed = self._match(TokenType.COMMA) and self._parse_number() 2653 self._match_r_paren() 2654 elif self._match_texts(("SEED", "REPEATABLE")): 2655 seed = self._parse_wrapped(self._parse_number) 2656 2657 return self.expression( 2658 exp.TableSample, 2659 method=method, 2660 bucket_numerator=bucket_numerator, 2661 bucket_denominator=bucket_denominator, 2662 bucket_field=bucket_field, 2663 percent=percent, 2664 rows=rows, 2665 size=size, 2666 seed=seed, 2667 kind=kind, 2668 ) 2669 2670 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2671 return list(iter(self._parse_pivot, None)) or None 2672 2673 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2674 return list(iter(self._parse_join, None)) or None 2675 2676 # https://duckdb.org/docs/sql/statements/pivot 2677 def _parse_simplified_pivot(self) -> exp.Pivot: 2678 def _parse_on() -> t.Optional[exp.Expression]: 2679 this = self._parse_bitwise() 2680 return self._parse_in(this) if self._match(TokenType.IN) else this 2681 2682 this = self._parse_table() 2683 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2684 using = self._match(TokenType.USING) and self._parse_csv( 2685 lambda: self._parse_alias(self._parse_function()) 2686 ) 2687 group = self._parse_group() 2688 return self.expression( 2689 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2690 ) 2691 2692 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2693 index = self._index 2694 include_nulls = None 2695 2696 if self._match(TokenType.PIVOT): 2697 unpivot = False 2698 elif self._match(TokenType.UNPIVOT): 2699 unpivot = True 2700 2701 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2702 if self._match_text_seq("INCLUDE", "NULLS"): 2703 include_nulls = True 2704 elif self._match_text_seq("EXCLUDE", "NULLS"): 2705 include_nulls = False 2706 else: 2707 return None 2708 2709 expressions = [] 2710 field = None 2711 2712 if not self._match(TokenType.L_PAREN): 2713 self._retreat(index) 2714 return None 2715 2716 if unpivot: 2717 expressions = self._parse_csv(self._parse_column) 2718 else: 2719 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2720 2721 if not expressions: 2722 self.raise_error("Failed to parse PIVOT's aggregation list") 2723 2724 if not self._match(TokenType.FOR): 2725 self.raise_error("Expecting FOR") 2726 2727 value = self._parse_column() 2728 2729 if not self._match(TokenType.IN): 2730 self.raise_error("Expecting IN") 2731 2732 field = self._parse_in(value, alias=True) 2733 2734 self._match_r_paren() 2735 2736 pivot = self.expression( 2737 exp.Pivot, 2738 expressions=expressions, 2739 field=field, 2740 unpivot=unpivot, 2741 include_nulls=include_nulls, 2742 ) 2743 2744 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2745 pivot.set("alias", self._parse_table_alias()) 2746 2747 if not unpivot: 2748 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2749 2750 columns: t.List[exp.Expression] = [] 2751 for fld in pivot.args["field"].expressions: 2752 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2753 for name in names: 2754 if self.PREFIXED_PIVOT_COLUMNS: 2755 name = f"{name}_{field_name}" if name else field_name 2756 else: 2757 name = f"{field_name}_{name}" if name else field_name 2758 2759 columns.append(exp.to_identifier(name)) 2760 2761 pivot.set("columns", columns) 2762 2763 return pivot 2764 2765 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2766 return [agg.alias for agg in aggregations] 2767 2768 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2769 if not skip_where_token and not self._match(TokenType.WHERE): 2770 return None 2771 2772 return self.expression( 2773 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2774 ) 2775 2776 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2777 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2778 return None 2779 2780 elements = defaultdict(list) 2781 2782 if self._match(TokenType.ALL): 2783 return self.expression(exp.Group, all=True) 2784 2785 while True: 2786 expressions = self._parse_csv(self._parse_conjunction) 2787 if expressions: 2788 elements["expressions"].extend(expressions) 2789 2790 grouping_sets = self._parse_grouping_sets() 2791 if grouping_sets: 2792 elements["grouping_sets"].extend(grouping_sets) 2793 2794 rollup = None 2795 cube = None 2796 totals = None 2797 2798 with_ = self._match(TokenType.WITH) 2799 if self._match(TokenType.ROLLUP): 2800 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2801 elements["rollup"].extend(ensure_list(rollup)) 2802 2803 if self._match(TokenType.CUBE): 2804 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2805 elements["cube"].extend(ensure_list(cube)) 2806 2807 if self._match_text_seq("TOTALS"): 2808 totals = True 2809 elements["totals"] = True # type: ignore 2810 2811 if not (grouping_sets or rollup or cube or totals): 2812 break 2813 2814 return self.expression(exp.Group, **elements) # type: ignore 2815 2816 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2817 if not self._match(TokenType.GROUPING_SETS): 2818 return None 2819 2820 return self._parse_wrapped_csv(self._parse_grouping_set) 2821 2822 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2823 if self._match(TokenType.L_PAREN): 2824 grouping_set = self._parse_csv(self._parse_column) 2825 self._match_r_paren() 2826 return self.expression(exp.Tuple, expressions=grouping_set) 2827 2828 return self._parse_column() 2829 2830 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2831 if not skip_having_token and not self._match(TokenType.HAVING): 2832 return None 2833 return self.expression(exp.Having, this=self._parse_conjunction()) 2834 2835 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2836 if not self._match(TokenType.QUALIFY): 2837 return None 2838 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2839 2840 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2841 if skip_start_token: 2842 start = None 2843 elif self._match(TokenType.START_WITH): 2844 start = self._parse_conjunction() 2845 else: 2846 return None 2847 2848 self._match(TokenType.CONNECT_BY) 2849 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2850 exp.Prior, this=self._parse_bitwise() 2851 ) 2852 connect = self._parse_conjunction() 2853 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2854 return self.expression(exp.Connect, start=start, connect=connect) 2855 2856 def _parse_order( 2857 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2858 ) -> t.Optional[exp.Expression]: 2859 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2860 return this 2861 2862 return self.expression( 2863 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2864 ) 2865 2866 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2867 if not self._match(token): 2868 return None 2869 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2870 2871 def _parse_ordered(self) -> exp.Ordered: 2872 this = self._parse_conjunction() 2873 self._match(TokenType.ASC) 2874 2875 is_desc = self._match(TokenType.DESC) 2876 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2877 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2878 desc = is_desc or False 2879 asc = not desc 2880 nulls_first = is_nulls_first or False 2881 explicitly_null_ordered = is_nulls_first or is_nulls_last 2882 2883 if ( 2884 not explicitly_null_ordered 2885 and ( 2886 (asc and self.NULL_ORDERING == "nulls_are_small") 2887 or (desc and self.NULL_ORDERING != "nulls_are_small") 2888 ) 2889 and self.NULL_ORDERING != "nulls_are_last" 2890 ): 2891 nulls_first = True 2892 2893 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2894 2895 def _parse_limit( 2896 self, this: t.Optional[exp.Expression] = None, top: bool = False 2897 ) -> t.Optional[exp.Expression]: 2898 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2899 comments = self._prev_comments 2900 if top: 2901 limit_paren = self._match(TokenType.L_PAREN) 2902 expression = self._parse_number() 2903 2904 if limit_paren: 2905 self._match_r_paren() 2906 else: 2907 expression = self._parse_term() 2908 2909 if self._match(TokenType.COMMA): 2910 offset = expression 2911 expression = self._parse_term() 2912 else: 2913 offset = None 2914 2915 limit_exp = self.expression( 2916 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2917 ) 2918 2919 return limit_exp 2920 2921 if self._match(TokenType.FETCH): 2922 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2923 direction = self._prev.text if direction else "FIRST" 2924 2925 count = self._parse_number() 2926 percent = self._match(TokenType.PERCENT) 2927 2928 self._match_set((TokenType.ROW, TokenType.ROWS)) 2929 2930 only = self._match_text_seq("ONLY") 2931 with_ties = self._match_text_seq("WITH", "TIES") 2932 2933 if only and with_ties: 2934 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2935 2936 return self.expression( 2937 exp.Fetch, 2938 direction=direction, 2939 count=count, 2940 percent=percent, 2941 with_ties=with_ties, 2942 ) 2943 2944 return this 2945 2946 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2947 if not self._match(TokenType.OFFSET): 2948 return this 2949 2950 count = self._parse_term() 2951 self._match_set((TokenType.ROW, TokenType.ROWS)) 2952 return self.expression(exp.Offset, this=this, expression=count) 2953 2954 def _parse_locks(self) -> t.List[exp.Lock]: 2955 locks = [] 2956 while True: 2957 if self._match_text_seq("FOR", "UPDATE"): 2958 update = True 2959 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2960 "LOCK", "IN", "SHARE", "MODE" 2961 ): 2962 update = False 2963 else: 2964 break 2965 2966 expressions = None 2967 if self._match_text_seq("OF"): 2968 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2969 2970 wait: t.Optional[bool | exp.Expression] = None 2971 if self._match_text_seq("NOWAIT"): 2972 wait = True 2973 elif self._match_text_seq("WAIT"): 2974 wait = self._parse_primary() 2975 elif self._match_text_seq("SKIP", "LOCKED"): 2976 wait = False 2977 2978 locks.append( 2979 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2980 ) 2981 2982 return locks 2983 2984 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2985 if not self._match_set(self.SET_OPERATIONS): 2986 return this 2987 2988 token_type = self._prev.token_type 2989 2990 if token_type == TokenType.UNION: 2991 expression = exp.Union 2992 elif token_type == TokenType.EXCEPT: 2993 expression = exp.Except 2994 else: 2995 expression = exp.Intersect 2996 2997 return self.expression( 2998 expression, 2999 this=this, 3000 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3001 by_name=self._match_text_seq("BY", "NAME"), 3002 expression=self._parse_set_operations(self._parse_select(nested=True)), 3003 ) 3004 3005 def _parse_expression(self) -> t.Optional[exp.Expression]: 3006 return self._parse_alias(self._parse_conjunction()) 3007 3008 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3009 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3010 3011 def _parse_equality(self) -> t.Optional[exp.Expression]: 3012 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3013 3014 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3015 return self._parse_tokens(self._parse_range, self.COMPARISON) 3016 3017 def _parse_range(self) -> t.Optional[exp.Expression]: 3018 this = self._parse_bitwise() 3019 negate = self._match(TokenType.NOT) 3020 3021 if self._match_set(self.RANGE_PARSERS): 3022 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3023 if not expression: 3024 return this 3025 3026 this = expression 3027 elif self._match(TokenType.ISNULL): 3028 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3029 3030 # Postgres supports ISNULL and NOTNULL for conditions. 3031 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3032 if self._match(TokenType.NOTNULL): 3033 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3034 this = self.expression(exp.Not, this=this) 3035 3036 if negate: 3037 this = self.expression(exp.Not, this=this) 3038 3039 if self._match(TokenType.IS): 3040 this = self._parse_is(this) 3041 3042 return this 3043 3044 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3045 index = self._index - 1 3046 negate = self._match(TokenType.NOT) 3047 3048 if self._match_text_seq("DISTINCT", "FROM"): 3049 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3050 return self.expression(klass, this=this, expression=self._parse_expression()) 3051 3052 expression = self._parse_null() or self._parse_boolean() 3053 if not expression: 3054 self._retreat(index) 3055 return None 3056 3057 this = self.expression(exp.Is, this=this, expression=expression) 3058 return self.expression(exp.Not, this=this) if negate else this 3059 3060 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3061 unnest = self._parse_unnest(with_alias=False) 3062 if unnest: 3063 this = self.expression(exp.In, this=this, unnest=unnest) 3064 elif self._match(TokenType.L_PAREN): 3065 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3066 3067 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3068 this = self.expression(exp.In, this=this, query=expressions[0]) 3069 else: 3070 this = self.expression(exp.In, this=this, expressions=expressions) 3071 3072 self._match_r_paren(this) 3073 else: 3074 this = self.expression(exp.In, this=this, field=self._parse_field()) 3075 3076 return this 3077 3078 def _parse_between(self, this: exp.Expression) -> exp.Between: 3079 low = self._parse_bitwise() 3080 self._match(TokenType.AND) 3081 high = self._parse_bitwise() 3082 return self.expression(exp.Between, this=this, low=low, high=high) 3083 3084 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3085 if not self._match(TokenType.ESCAPE): 3086 return this 3087 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3088 3089 def _parse_interval(self) -> t.Optional[exp.Interval]: 3090 index = self._index 3091 3092 if not self._match(TokenType.INTERVAL): 3093 return None 3094 3095 if self._match(TokenType.STRING, advance=False): 3096 this = self._parse_primary() 3097 else: 3098 this = self._parse_term() 3099 3100 if not this: 3101 self._retreat(index) 3102 return None 3103 3104 unit = self._parse_function() or self._parse_var(any_token=True) 3105 3106 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3107 # each INTERVAL expression into this canonical form so it's easy to transpile 3108 if this and this.is_number: 3109 this = exp.Literal.string(this.name) 3110 elif this and this.is_string: 3111 parts = this.name.split() 3112 3113 if len(parts) == 2: 3114 if unit: 3115 # this is not actually a unit, it's something else 3116 unit = None 3117 self._retreat(self._index - 1) 3118 else: 3119 this = exp.Literal.string(parts[0]) 3120 unit = self.expression(exp.Var, this=parts[1]) 3121 3122 return self.expression(exp.Interval, this=this, unit=unit) 3123 3124 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3125 this = self._parse_term() 3126 3127 while True: 3128 if self._match_set(self.BITWISE): 3129 this = self.expression( 3130 self.BITWISE[self._prev.token_type], 3131 this=this, 3132 expression=self._parse_term(), 3133 ) 3134 elif self._match(TokenType.DQMARK): 3135 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3136 elif self._match_pair(TokenType.LT, TokenType.LT): 3137 this = self.expression( 3138 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3139 ) 3140 elif self._match_pair(TokenType.GT, TokenType.GT): 3141 this = self.expression( 3142 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3143 ) 3144 else: 3145 break 3146 3147 return this 3148 3149 def _parse_term(self) -> t.Optional[exp.Expression]: 3150 return self._parse_tokens(self._parse_factor, self.TERM) 3151 3152 def _parse_factor(self) -> t.Optional[exp.Expression]: 3153 return self._parse_tokens(self._parse_unary, self.FACTOR) 3154 3155 def _parse_unary(self) -> t.Optional[exp.Expression]: 3156 if self._match_set(self.UNARY_PARSERS): 3157 return self.UNARY_PARSERS[self._prev.token_type](self) 3158 return self._parse_at_time_zone(self._parse_type()) 3159 3160 def _parse_type(self) -> t.Optional[exp.Expression]: 3161 interval = self._parse_interval() 3162 if interval: 3163 return interval 3164 3165 index = self._index 3166 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3167 this = self._parse_column() 3168 3169 if data_type: 3170 if isinstance(this, exp.Literal): 3171 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3172 if parser: 3173 return parser(self, this, data_type) 3174 return self.expression(exp.Cast, this=this, to=data_type) 3175 if not data_type.expressions: 3176 self._retreat(index) 3177 return self._parse_column() 3178 return self._parse_column_ops(data_type) 3179 3180 return this 3181 3182 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3183 this = self._parse_type() 3184 if not this: 3185 return None 3186 3187 return self.expression( 3188 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3189 ) 3190 3191 def _parse_types( 3192 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3193 ) -> t.Optional[exp.Expression]: 3194 index = self._index 3195 3196 prefix = self._match_text_seq("SYSUDTLIB", ".") 3197 3198 if not self._match_set(self.TYPE_TOKENS): 3199 identifier = allow_identifiers and self._parse_id_var( 3200 any_token=False, tokens=(TokenType.VAR,) 3201 ) 3202 3203 if identifier: 3204 tokens = self._tokenizer.tokenize(identifier.name) 3205 3206 if len(tokens) != 1: 3207 self.raise_error("Unexpected identifier", self._prev) 3208 3209 if tokens[0].token_type in self.TYPE_TOKENS: 3210 self._prev = tokens[0] 3211 elif self.SUPPORTS_USER_DEFINED_TYPES: 3212 return identifier 3213 else: 3214 return None 3215 else: 3216 return None 3217 3218 type_token = self._prev.token_type 3219 3220 if type_token == TokenType.PSEUDO_TYPE: 3221 return self.expression(exp.PseudoType, this=self._prev.text) 3222 3223 nested = type_token in self.NESTED_TYPE_TOKENS 3224 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3225 expressions = None 3226 maybe_func = False 3227 3228 if self._match(TokenType.L_PAREN): 3229 if is_struct: 3230 expressions = self._parse_csv(self._parse_struct_types) 3231 elif nested: 3232 expressions = self._parse_csv( 3233 lambda: self._parse_types( 3234 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3235 ) 3236 ) 3237 elif type_token in self.ENUM_TYPE_TOKENS: 3238 expressions = self._parse_csv(self._parse_equality) 3239 else: 3240 expressions = self._parse_csv(self._parse_type_size) 3241 3242 if not expressions or not self._match(TokenType.R_PAREN): 3243 self._retreat(index) 3244 return None 3245 3246 maybe_func = True 3247 3248 this: t.Optional[exp.Expression] = None 3249 values: t.Optional[t.List[exp.Expression]] = None 3250 3251 if nested and self._match(TokenType.LT): 3252 if is_struct: 3253 expressions = self._parse_csv(self._parse_struct_types) 3254 else: 3255 expressions = self._parse_csv( 3256 lambda: self._parse_types( 3257 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3258 ) 3259 ) 3260 3261 if not self._match(TokenType.GT): 3262 self.raise_error("Expecting >") 3263 3264 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3265 values = self._parse_csv(self._parse_conjunction) 3266 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3267 3268 if type_token in self.TIMESTAMPS: 3269 if self._match_text_seq("WITH", "TIME", "ZONE"): 3270 maybe_func = False 3271 tz_type = ( 3272 exp.DataType.Type.TIMETZ 3273 if type_token in self.TIMES 3274 else exp.DataType.Type.TIMESTAMPTZ 3275 ) 3276 this = exp.DataType(this=tz_type, expressions=expressions) 3277 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3278 maybe_func = False 3279 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3280 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3281 maybe_func = False 3282 elif type_token == TokenType.INTERVAL: 3283 if self._match_text_seq("YEAR", "TO", "MONTH"): 3284 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3285 elif self._match_text_seq("DAY", "TO", "SECOND"): 3286 span = [exp.IntervalDayToSecondSpan()] 3287 else: 3288 span = None 3289 3290 unit = not span and self._parse_var() 3291 if not unit: 3292 this = self.expression( 3293 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3294 ) 3295 else: 3296 this = self.expression(exp.Interval, unit=unit) 3297 3298 if maybe_func and check_func: 3299 index2 = self._index 3300 peek = self._parse_string() 3301 3302 if not peek: 3303 self._retreat(index) 3304 return None 3305 3306 self._retreat(index2) 3307 3308 if not this: 3309 this = exp.DataType( 3310 this=exp.DataType.Type[type_token.value], 3311 expressions=expressions, 3312 nested=nested, 3313 values=values, 3314 prefix=prefix, 3315 ) 3316 3317 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3318 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3319 3320 return this 3321 3322 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3323 this = self._parse_type() or self._parse_id_var() 3324 self._match(TokenType.COLON) 3325 return self._parse_column_def(this) 3326 3327 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3328 if not self._match_text_seq("AT", "TIME", "ZONE"): 3329 return this 3330 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3331 3332 def _parse_column(self) -> t.Optional[exp.Expression]: 3333 this = self._parse_field() 3334 if isinstance(this, exp.Identifier): 3335 this = self.expression(exp.Column, this=this) 3336 elif not this: 3337 return self._parse_bracket(this) 3338 return self._parse_column_ops(this) 3339 3340 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3341 this = self._parse_bracket(this) 3342 3343 while self._match_set(self.COLUMN_OPERATORS): 3344 op_token = self._prev.token_type 3345 op = self.COLUMN_OPERATORS.get(op_token) 3346 3347 if op_token == TokenType.DCOLON: 3348 field = self._parse_types() 3349 if not field: 3350 self.raise_error("Expected type") 3351 elif op and self._curr: 3352 self._advance() 3353 value = self._prev.text 3354 field = ( 3355 exp.Literal.number(value) 3356 if self._prev.token_type == TokenType.NUMBER 3357 else exp.Literal.string(value) 3358 ) 3359 else: 3360 field = self._parse_field(anonymous_func=True, any_token=True) 3361 3362 if isinstance(field, exp.Func): 3363 # bigquery allows function calls like x.y.count(...) 3364 # SAFE.SUBSTR(...) 3365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3366 this = self._replace_columns_with_dots(this) 3367 3368 if op: 3369 this = op(self, this, field) 3370 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3371 this = self.expression( 3372 exp.Column, 3373 this=field, 3374 table=this.this, 3375 db=this.args.get("table"), 3376 catalog=this.args.get("db"), 3377 ) 3378 else: 3379 this = self.expression(exp.Dot, this=this, expression=field) 3380 this = self._parse_bracket(this) 3381 return this 3382 3383 def _parse_primary(self) -> t.Optional[exp.Expression]: 3384 if self._match_set(self.PRIMARY_PARSERS): 3385 token_type = self._prev.token_type 3386 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3387 3388 if token_type == TokenType.STRING: 3389 expressions = [primary] 3390 while self._match(TokenType.STRING): 3391 expressions.append(exp.Literal.string(self._prev.text)) 3392 3393 if len(expressions) > 1: 3394 return self.expression(exp.Concat, expressions=expressions) 3395 3396 return primary 3397 3398 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3399 return exp.Literal.number(f"0.{self._prev.text}") 3400 3401 if self._match(TokenType.L_PAREN): 3402 comments = self._prev_comments 3403 query = self._parse_select() 3404 3405 if query: 3406 expressions = [query] 3407 else: 3408 expressions = self._parse_expressions() 3409 3410 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3411 3412 if isinstance(this, exp.Subqueryable): 3413 this = self._parse_set_operations( 3414 self._parse_subquery(this=this, parse_alias=False) 3415 ) 3416 elif len(expressions) > 1: 3417 this = self.expression(exp.Tuple, expressions=expressions) 3418 else: 3419 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3420 3421 if this: 3422 this.add_comments(comments) 3423 3424 self._match_r_paren(expression=this) 3425 return this 3426 3427 return None 3428 3429 def _parse_field( 3430 self, 3431 any_token: bool = False, 3432 tokens: t.Optional[t.Collection[TokenType]] = None, 3433 anonymous_func: bool = False, 3434 ) -> t.Optional[exp.Expression]: 3435 return ( 3436 self._parse_primary() 3437 or self._parse_function(anonymous=anonymous_func) 3438 or self._parse_id_var(any_token=any_token, tokens=tokens) 3439 ) 3440 3441 def _parse_function( 3442 self, 3443 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3444 anonymous: bool = False, 3445 optional_parens: bool = True, 3446 ) -> t.Optional[exp.Expression]: 3447 if not self._curr: 3448 return None 3449 3450 token_type = self._curr.token_type 3451 this = self._curr.text 3452 upper = this.upper() 3453 3454 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3455 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3456 self._advance() 3457 return parser(self) 3458 3459 if not self._next or self._next.token_type != TokenType.L_PAREN: 3460 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3461 self._advance() 3462 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3463 3464 return None 3465 3466 if token_type not in self.FUNC_TOKENS: 3467 return None 3468 3469 self._advance(2) 3470 3471 parser = self.FUNCTION_PARSERS.get(upper) 3472 if parser and not anonymous: 3473 this = parser(self) 3474 else: 3475 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3476 3477 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3478 this = self.expression(subquery_predicate, this=self._parse_select()) 3479 self._match_r_paren() 3480 return this 3481 3482 if functions is None: 3483 functions = self.FUNCTIONS 3484 3485 function = functions.get(upper) 3486 3487 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3488 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3489 3490 if function and not anonymous: 3491 func = self.validate_expression(function(args), args) 3492 if not self.NORMALIZE_FUNCTIONS: 3493 func.meta["name"] = this 3494 this = func 3495 else: 3496 this = self.expression(exp.Anonymous, this=this, expressions=args) 3497 3498 self._match_r_paren(this) 3499 return self._parse_window(this) 3500 3501 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3502 return self._parse_column_def(self._parse_id_var()) 3503 3504 def _parse_user_defined_function( 3505 self, kind: t.Optional[TokenType] = None 3506 ) -> t.Optional[exp.Expression]: 3507 this = self._parse_id_var() 3508 3509 while self._match(TokenType.DOT): 3510 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3511 3512 if not self._match(TokenType.L_PAREN): 3513 return this 3514 3515 expressions = self._parse_csv(self._parse_function_parameter) 3516 self._match_r_paren() 3517 return self.expression( 3518 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3519 ) 3520 3521 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3522 literal = self._parse_primary() 3523 if literal: 3524 return self.expression(exp.Introducer, this=token.text, expression=literal) 3525 3526 return self.expression(exp.Identifier, this=token.text) 3527 3528 def _parse_session_parameter(self) -> exp.SessionParameter: 3529 kind = None 3530 this = self._parse_id_var() or self._parse_primary() 3531 3532 if this and self._match(TokenType.DOT): 3533 kind = this.name 3534 this = self._parse_var() or self._parse_primary() 3535 3536 return self.expression(exp.SessionParameter, this=this, kind=kind) 3537 3538 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3539 index = self._index 3540 3541 if self._match(TokenType.L_PAREN): 3542 expressions = t.cast( 3543 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3544 ) 3545 3546 if not self._match(TokenType.R_PAREN): 3547 self._retreat(index) 3548 else: 3549 expressions = [self._parse_id_var()] 3550 3551 if self._match_set(self.LAMBDAS): 3552 return self.LAMBDAS[self._prev.token_type](self, expressions) 3553 3554 self._retreat(index) 3555 3556 this: t.Optional[exp.Expression] 3557 3558 if self._match(TokenType.DISTINCT): 3559 this = self.expression( 3560 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3561 ) 3562 else: 3563 this = self._parse_select_or_expression(alias=alias) 3564 3565 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3566 3567 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3568 index = self._index 3569 3570 if not self.errors: 3571 try: 3572 if self._parse_select(nested=True): 3573 return this 3574 except ParseError: 3575 pass 3576 finally: 3577 self.errors.clear() 3578 self._retreat(index) 3579 3580 if not self._match(TokenType.L_PAREN): 3581 return this 3582 3583 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3584 3585 self._match_r_paren() 3586 return self.expression(exp.Schema, this=this, expressions=args) 3587 3588 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3589 return self._parse_column_def(self._parse_field(any_token=True)) 3590 3591 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3592 # column defs are not really columns, they're identifiers 3593 if isinstance(this, exp.Column): 3594 this = this.this 3595 3596 kind = self._parse_types(schema=True) 3597 3598 if self._match_text_seq("FOR", "ORDINALITY"): 3599 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3600 3601 constraints: t.List[exp.Expression] = [] 3602 3603 if not kind and self._match(TokenType.ALIAS): 3604 constraints.append( 3605 self.expression( 3606 exp.ComputedColumnConstraint, 3607 this=self._parse_conjunction(), 3608 persisted=self._match_text_seq("PERSISTED"), 3609 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3610 ) 3611 ) 3612 3613 while True: 3614 constraint = self._parse_column_constraint() 3615 if not constraint: 3616 break 3617 constraints.append(constraint) 3618 3619 if not kind and not constraints: 3620 return this 3621 3622 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3623 3624 def _parse_auto_increment( 3625 self, 3626 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3627 start = None 3628 increment = None 3629 3630 if self._match(TokenType.L_PAREN, advance=False): 3631 args = self._parse_wrapped_csv(self._parse_bitwise) 3632 start = seq_get(args, 0) 3633 increment = seq_get(args, 1) 3634 elif self._match_text_seq("START"): 3635 start = self._parse_bitwise() 3636 self._match_text_seq("INCREMENT") 3637 increment = self._parse_bitwise() 3638 3639 if start and increment: 3640 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3641 3642 return exp.AutoIncrementColumnConstraint() 3643 3644 def _parse_compress(self) -> exp.CompressColumnConstraint: 3645 if self._match(TokenType.L_PAREN, advance=False): 3646 return self.expression( 3647 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3648 ) 3649 3650 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3651 3652 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3653 if self._match_text_seq("BY", "DEFAULT"): 3654 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3655 this = self.expression( 3656 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3657 ) 3658 else: 3659 self._match_text_seq("ALWAYS") 3660 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3661 3662 self._match(TokenType.ALIAS) 3663 identity = self._match_text_seq("IDENTITY") 3664 3665 if self._match(TokenType.L_PAREN): 3666 if self._match(TokenType.START_WITH): 3667 this.set("start", self._parse_bitwise()) 3668 if self._match_text_seq("INCREMENT", "BY"): 3669 this.set("increment", self._parse_bitwise()) 3670 if self._match_text_seq("MINVALUE"): 3671 this.set("minvalue", self._parse_bitwise()) 3672 if self._match_text_seq("MAXVALUE"): 3673 this.set("maxvalue", self._parse_bitwise()) 3674 3675 if self._match_text_seq("CYCLE"): 3676 this.set("cycle", True) 3677 elif self._match_text_seq("NO", "CYCLE"): 3678 this.set("cycle", False) 3679 3680 if not identity: 3681 this.set("expression", self._parse_bitwise()) 3682 3683 self._match_r_paren() 3684 3685 return this 3686 3687 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3688 self._match_text_seq("LENGTH") 3689 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3690 3691 def _parse_not_constraint( 3692 self, 3693 ) -> t.Optional[exp.Expression]: 3694 if self._match_text_seq("NULL"): 3695 return self.expression(exp.NotNullColumnConstraint) 3696 if self._match_text_seq("CASESPECIFIC"): 3697 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3698 if self._match_text_seq("FOR", "REPLICATION"): 3699 return self.expression(exp.NotForReplicationColumnConstraint) 3700 return None 3701 3702 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3703 if self._match(TokenType.CONSTRAINT): 3704 this = self._parse_id_var() 3705 else: 3706 this = None 3707 3708 if self._match_texts(self.CONSTRAINT_PARSERS): 3709 return self.expression( 3710 exp.ColumnConstraint, 3711 this=this, 3712 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3713 ) 3714 3715 return this 3716 3717 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3718 if not self._match(TokenType.CONSTRAINT): 3719 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3720 3721 this = self._parse_id_var() 3722 expressions = [] 3723 3724 while True: 3725 constraint = self._parse_unnamed_constraint() or self._parse_function() 3726 if not constraint: 3727 break 3728 expressions.append(constraint) 3729 3730 return self.expression(exp.Constraint, this=this, expressions=expressions) 3731 3732 def _parse_unnamed_constraint( 3733 self, constraints: t.Optional[t.Collection[str]] = None 3734 ) -> t.Optional[exp.Expression]: 3735 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3736 return None 3737 3738 constraint = self._prev.text.upper() 3739 if constraint not in self.CONSTRAINT_PARSERS: 3740 self.raise_error(f"No parser found for schema constraint {constraint}.") 3741 3742 return self.CONSTRAINT_PARSERS[constraint](self) 3743 3744 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3745 self._match_text_seq("KEY") 3746 return self.expression( 3747 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3748 ) 3749 3750 def _parse_key_constraint_options(self) -> t.List[str]: 3751 options = [] 3752 while True: 3753 if not self._curr: 3754 break 3755 3756 if self._match(TokenType.ON): 3757 action = None 3758 on = self._advance_any() and self._prev.text 3759 3760 if self._match_text_seq("NO", "ACTION"): 3761 action = "NO ACTION" 3762 elif self._match_text_seq("CASCADE"): 3763 action = "CASCADE" 3764 elif self._match_pair(TokenType.SET, TokenType.NULL): 3765 action = "SET NULL" 3766 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3767 action = "SET DEFAULT" 3768 else: 3769 self.raise_error("Invalid key constraint") 3770 3771 options.append(f"ON {on} {action}") 3772 elif self._match_text_seq("NOT", "ENFORCED"): 3773 options.append("NOT ENFORCED") 3774 elif self._match_text_seq("DEFERRABLE"): 3775 options.append("DEFERRABLE") 3776 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3777 options.append("INITIALLY DEFERRED") 3778 elif self._match_text_seq("NORELY"): 3779 options.append("NORELY") 3780 elif self._match_text_seq("MATCH", "FULL"): 3781 options.append("MATCH FULL") 3782 else: 3783 break 3784 3785 return options 3786 3787 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3788 if match and not self._match(TokenType.REFERENCES): 3789 return None 3790 3791 expressions = None 3792 this = self._parse_table(schema=True) 3793 options = self._parse_key_constraint_options() 3794 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3795 3796 def _parse_foreign_key(self) -> exp.ForeignKey: 3797 expressions = self._parse_wrapped_id_vars() 3798 reference = self._parse_references() 3799 options = {} 3800 3801 while self._match(TokenType.ON): 3802 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3803 self.raise_error("Expected DELETE or UPDATE") 3804 3805 kind = self._prev.text.lower() 3806 3807 if self._match_text_seq("NO", "ACTION"): 3808 action = "NO ACTION" 3809 elif self._match(TokenType.SET): 3810 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3811 action = "SET " + self._prev.text.upper() 3812 else: 3813 self._advance() 3814 action = self._prev.text.upper() 3815 3816 options[kind] = action 3817 3818 return self.expression( 3819 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3820 ) 3821 3822 def _parse_primary_key( 3823 self, wrapped_optional: bool = False, in_props: bool = False 3824 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3825 desc = ( 3826 self._match_set((TokenType.ASC, TokenType.DESC)) 3827 and self._prev.token_type == TokenType.DESC 3828 ) 3829 3830 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3831 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3832 3833 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3834 options = self._parse_key_constraint_options() 3835 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3836 3837 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3838 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3839 return this 3840 3841 bracket_kind = self._prev.token_type 3842 3843 if self._match(TokenType.COLON): 3844 expressions: t.List[exp.Expression] = [ 3845 self.expression(exp.Slice, expression=self._parse_conjunction()) 3846 ] 3847 else: 3848 expressions = self._parse_csv( 3849 lambda: self._parse_slice( 3850 self._parse_alias(self._parse_conjunction(), explicit=True) 3851 ) 3852 ) 3853 3854 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3855 if bracket_kind == TokenType.L_BRACE: 3856 this = self.expression(exp.Struct, expressions=expressions) 3857 elif not this or this.name.upper() == "ARRAY": 3858 this = self.expression(exp.Array, expressions=expressions) 3859 else: 3860 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3861 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3862 3863 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3864 self.raise_error("Expected ]") 3865 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3866 self.raise_error("Expected }") 3867 3868 self._add_comments(this) 3869 return self._parse_bracket(this) 3870 3871 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3872 if self._match(TokenType.COLON): 3873 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3874 return this 3875 3876 def _parse_case(self) -> t.Optional[exp.Expression]: 3877 ifs = [] 3878 default = None 3879 3880 comments = self._prev_comments 3881 expression = self._parse_conjunction() 3882 3883 while self._match(TokenType.WHEN): 3884 this = self._parse_conjunction() 3885 self._match(TokenType.THEN) 3886 then = self._parse_conjunction() 3887 ifs.append(self.expression(exp.If, this=this, true=then)) 3888 3889 if self._match(TokenType.ELSE): 3890 default = self._parse_conjunction() 3891 3892 if not self._match(TokenType.END): 3893 self.raise_error("Expected END after CASE", self._prev) 3894 3895 return self._parse_window( 3896 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3897 ) 3898 3899 def _parse_if(self) -> t.Optional[exp.Expression]: 3900 if self._match(TokenType.L_PAREN): 3901 args = self._parse_csv(self._parse_conjunction) 3902 this = self.validate_expression(exp.If.from_arg_list(args), args) 3903 self._match_r_paren() 3904 else: 3905 index = self._index - 1 3906 condition = self._parse_conjunction() 3907 3908 if not condition: 3909 self._retreat(index) 3910 return None 3911 3912 self._match(TokenType.THEN) 3913 true = self._parse_conjunction() 3914 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3915 self._match(TokenType.END) 3916 this = self.expression(exp.If, this=condition, true=true, false=false) 3917 3918 return self._parse_window(this) 3919 3920 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3921 if not self._match_text_seq("VALUE", "FOR"): 3922 self._retreat(self._index - 1) 3923 return None 3924 3925 return self.expression( 3926 exp.NextValueFor, 3927 this=self._parse_column(), 3928 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3929 ) 3930 3931 def _parse_extract(self) -> exp.Extract: 3932 this = self._parse_function() or self._parse_var() or self._parse_type() 3933 3934 if self._match(TokenType.FROM): 3935 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3936 3937 if not self._match(TokenType.COMMA): 3938 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3939 3940 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3941 3942 def _parse_any_value(self) -> exp.AnyValue: 3943 this = self._parse_lambda() 3944 is_max = None 3945 having = None 3946 3947 if self._match(TokenType.HAVING): 3948 self._match_texts(("MAX", "MIN")) 3949 is_max = self._prev.text == "MAX" 3950 having = self._parse_column() 3951 3952 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3953 3954 def _parse_cast(self, strict: bool) -> exp.Expression: 3955 this = self._parse_conjunction() 3956 3957 if not self._match(TokenType.ALIAS): 3958 if self._match(TokenType.COMMA): 3959 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3960 3961 self.raise_error("Expected AS after CAST") 3962 3963 fmt = None 3964 to = self._parse_types() 3965 3966 if not to: 3967 self.raise_error("Expected TYPE after CAST") 3968 elif isinstance(to, exp.Identifier): 3969 to = exp.DataType.build(to.name, udt=True) 3970 elif to.this == exp.DataType.Type.CHAR: 3971 if self._match(TokenType.CHARACTER_SET): 3972 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3973 elif self._match(TokenType.FORMAT): 3974 fmt_string = self._parse_string() 3975 fmt = self._parse_at_time_zone(fmt_string) 3976 3977 if to.this in exp.DataType.TEMPORAL_TYPES: 3978 this = self.expression( 3979 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3980 this=this, 3981 format=exp.Literal.string( 3982 format_time( 3983 fmt_string.this if fmt_string else "", 3984 self.FORMAT_MAPPING or self.TIME_MAPPING, 3985 self.FORMAT_TRIE or self.TIME_TRIE, 3986 ) 3987 ), 3988 ) 3989 3990 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3991 this.set("zone", fmt.args["zone"]) 3992 3993 return this 3994 3995 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3996 3997 def _parse_concat(self) -> t.Optional[exp.Expression]: 3998 args = self._parse_csv(self._parse_conjunction) 3999 if self.CONCAT_NULL_OUTPUTS_STRING: 4000 args = [ 4001 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 4002 for arg in args 4003 if arg 4004 ] 4005 4006 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4007 # we find such a call we replace it with its argument. 4008 if len(args) == 1: 4009 return args[0] 4010 4011 return self.expression( 4012 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4013 ) 4014 4015 def _parse_string_agg(self) -> exp.Expression: 4016 if self._match(TokenType.DISTINCT): 4017 args: t.List[t.Optional[exp.Expression]] = [ 4018 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4019 ] 4020 if self._match(TokenType.COMMA): 4021 args.extend(self._parse_csv(self._parse_conjunction)) 4022 else: 4023 args = self._parse_csv(self._parse_conjunction) # type: ignore 4024 4025 index = self._index 4026 if not self._match(TokenType.R_PAREN) and args: 4027 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4028 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4029 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4030 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4031 4032 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4033 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4034 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4035 if not self._match_text_seq("WITHIN", "GROUP"): 4036 self._retreat(index) 4037 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4038 4039 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4040 order = self._parse_order(this=seq_get(args, 0)) 4041 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4042 4043 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4044 this = self._parse_bitwise() 4045 4046 if self._match(TokenType.USING): 4047 to: t.Optional[exp.Expression] = self.expression( 4048 exp.CharacterSet, this=self._parse_var() 4049 ) 4050 elif self._match(TokenType.COMMA): 4051 to = self._parse_types() 4052 else: 4053 to = None 4054 4055 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4056 4057 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4058 """ 4059 There are generally two variants of the DECODE function: 4060 4061 - DECODE(bin, charset) 4062 - DECODE(expression, search, result [, search, result] ... [, default]) 4063 4064 The second variant will always be parsed into a CASE expression. Note that NULL 4065 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4066 instead of relying on pattern matching. 4067 """ 4068 args = self._parse_csv(self._parse_conjunction) 4069 4070 if len(args) < 3: 4071 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4072 4073 expression, *expressions = args 4074 if not expression: 4075 return None 4076 4077 ifs = [] 4078 for search, result in zip(expressions[::2], expressions[1::2]): 4079 if not search or not result: 4080 return None 4081 4082 if isinstance(search, exp.Literal): 4083 ifs.append( 4084 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4085 ) 4086 elif isinstance(search, exp.Null): 4087 ifs.append( 4088 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4089 ) 4090 else: 4091 cond = exp.or_( 4092 exp.EQ(this=expression.copy(), expression=search), 4093 exp.and_( 4094 exp.Is(this=expression.copy(), expression=exp.Null()), 4095 exp.Is(this=search.copy(), expression=exp.Null()), 4096 copy=False, 4097 ), 4098 copy=False, 4099 ) 4100 ifs.append(exp.If(this=cond, true=result)) 4101 4102 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4103 4104 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4105 self._match_text_seq("KEY") 4106 key = self._parse_field() 4107 self._match(TokenType.COLON) 4108 self._match_text_seq("VALUE") 4109 value = self._parse_field() 4110 4111 if not key and not value: 4112 return None 4113 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4114 4115 def _parse_json_object(self) -> exp.JSONObject: 4116 star = self._parse_star() 4117 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4118 4119 null_handling = None 4120 if self._match_text_seq("NULL", "ON", "NULL"): 4121 null_handling = "NULL ON NULL" 4122 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4123 null_handling = "ABSENT ON NULL" 4124 4125 unique_keys = None 4126 if self._match_text_seq("WITH", "UNIQUE"): 4127 unique_keys = True 4128 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4129 unique_keys = False 4130 4131 self._match_text_seq("KEYS") 4132 4133 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4134 format_json = self._match_text_seq("FORMAT", "JSON") 4135 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4136 4137 return self.expression( 4138 exp.JSONObject, 4139 expressions=expressions, 4140 null_handling=null_handling, 4141 unique_keys=unique_keys, 4142 return_type=return_type, 4143 format_json=format_json, 4144 encoding=encoding, 4145 ) 4146 4147 def _parse_logarithm(self) -> exp.Func: 4148 # Default argument order is base, expression 4149 args = self._parse_csv(self._parse_range) 4150 4151 if len(args) > 1: 4152 if not self.LOG_BASE_FIRST: 4153 args.reverse() 4154 return exp.Log.from_arg_list(args) 4155 4156 return self.expression( 4157 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4158 ) 4159 4160 def _parse_match_against(self) -> exp.MatchAgainst: 4161 expressions = self._parse_csv(self._parse_column) 4162 4163 self._match_text_seq(")", "AGAINST", "(") 4164 4165 this = self._parse_string() 4166 4167 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4168 modifier = "IN NATURAL LANGUAGE MODE" 4169 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4170 modifier = f"{modifier} WITH QUERY EXPANSION" 4171 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4172 modifier = "IN BOOLEAN MODE" 4173 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4174 modifier = "WITH QUERY EXPANSION" 4175 else: 4176 modifier = None 4177 4178 return self.expression( 4179 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4180 ) 4181 4182 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4183 def _parse_open_json(self) -> exp.OpenJSON: 4184 this = self._parse_bitwise() 4185 path = self._match(TokenType.COMMA) and self._parse_string() 4186 4187 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4188 this = self._parse_field(any_token=True) 4189 kind = self._parse_types() 4190 path = self._parse_string() 4191 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4192 4193 return self.expression( 4194 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4195 ) 4196 4197 expressions = None 4198 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4199 self._match_l_paren() 4200 expressions = self._parse_csv(_parse_open_json_column_def) 4201 4202 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4203 4204 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4205 args = self._parse_csv(self._parse_bitwise) 4206 4207 if self._match(TokenType.IN): 4208 return self.expression( 4209 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4210 ) 4211 4212 if haystack_first: 4213 haystack = seq_get(args, 0) 4214 needle = seq_get(args, 1) 4215 else: 4216 needle = seq_get(args, 0) 4217 haystack = seq_get(args, 1) 4218 4219 return self.expression( 4220 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4221 ) 4222 4223 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4224 args = self._parse_csv(self._parse_table) 4225 return exp.JoinHint(this=func_name.upper(), expressions=args) 4226 4227 def _parse_substring(self) -> exp.Substring: 4228 # Postgres supports the form: substring(string [from int] [for int]) 4229 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4230 4231 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4232 4233 if self._match(TokenType.FROM): 4234 args.append(self._parse_bitwise()) 4235 if self._match(TokenType.FOR): 4236 args.append(self._parse_bitwise()) 4237 4238 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4239 4240 def _parse_trim(self) -> exp.Trim: 4241 # https://www.w3resource.com/sql/character-functions/trim.php 4242 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4243 4244 position = None 4245 collation = None 4246 4247 if self._match_texts(self.TRIM_TYPES): 4248 position = self._prev.text.upper() 4249 4250 expression = self._parse_bitwise() 4251 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4252 this = self._parse_bitwise() 4253 else: 4254 this = expression 4255 expression = None 4256 4257 if self._match(TokenType.COLLATE): 4258 collation = self._parse_bitwise() 4259 4260 return self.expression( 4261 exp.Trim, this=this, position=position, expression=expression, collation=collation 4262 ) 4263 4264 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4265 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4266 4267 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4268 return self._parse_window(self._parse_id_var(), alias=True) 4269 4270 def _parse_respect_or_ignore_nulls( 4271 self, this: t.Optional[exp.Expression] 4272 ) -> t.Optional[exp.Expression]: 4273 if self._match_text_seq("IGNORE", "NULLS"): 4274 return self.expression(exp.IgnoreNulls, this=this) 4275 if self._match_text_seq("RESPECT", "NULLS"): 4276 return self.expression(exp.RespectNulls, this=this) 4277 return this 4278 4279 def _parse_window( 4280 self, this: t.Optional[exp.Expression], alias: bool = False 4281 ) -> t.Optional[exp.Expression]: 4282 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4283 self._match(TokenType.WHERE) 4284 this = self.expression( 4285 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4286 ) 4287 self._match_r_paren() 4288 4289 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4290 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4291 if self._match_text_seq("WITHIN", "GROUP"): 4292 order = self._parse_wrapped(self._parse_order) 4293 this = self.expression(exp.WithinGroup, this=this, expression=order) 4294 4295 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4296 # Some dialects choose to implement and some do not. 4297 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4298 4299 # There is some code above in _parse_lambda that handles 4300 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4301 4302 # The below changes handle 4303 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4304 4305 # Oracle allows both formats 4306 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4307 # and Snowflake chose to do the same for familiarity 4308 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4309 this = self._parse_respect_or_ignore_nulls(this) 4310 4311 # bigquery select from window x AS (partition by ...) 4312 if alias: 4313 over = None 4314 self._match(TokenType.ALIAS) 4315 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4316 return this 4317 else: 4318 over = self._prev.text.upper() 4319 4320 if not self._match(TokenType.L_PAREN): 4321 return self.expression( 4322 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4323 ) 4324 4325 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4326 4327 first = self._match(TokenType.FIRST) 4328 if self._match_text_seq("LAST"): 4329 first = False 4330 4331 partition, order = self._parse_partition_and_order() 4332 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4333 4334 if kind: 4335 self._match(TokenType.BETWEEN) 4336 start = self._parse_window_spec() 4337 self._match(TokenType.AND) 4338 end = self._parse_window_spec() 4339 4340 spec = self.expression( 4341 exp.WindowSpec, 4342 kind=kind, 4343 start=start["value"], 4344 start_side=start["side"], 4345 end=end["value"], 4346 end_side=end["side"], 4347 ) 4348 else: 4349 spec = None 4350 4351 self._match_r_paren() 4352 4353 window = self.expression( 4354 exp.Window, 4355 this=this, 4356 partition_by=partition, 4357 order=order, 4358 spec=spec, 4359 alias=window_alias, 4360 over=over, 4361 first=first, 4362 ) 4363 4364 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4365 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4366 return self._parse_window(window, alias=alias) 4367 4368 return window 4369 4370 def _parse_partition_and_order( 4371 self, 4372 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4373 return self._parse_partition_by(), self._parse_order() 4374 4375 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4376 self._match(TokenType.BETWEEN) 4377 4378 return { 4379 "value": ( 4380 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4381 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4382 or self._parse_bitwise() 4383 ), 4384 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4385 } 4386 4387 def _parse_alias( 4388 self, this: t.Optional[exp.Expression], explicit: bool = False 4389 ) -> t.Optional[exp.Expression]: 4390 any_token = self._match(TokenType.ALIAS) 4391 4392 if explicit and not any_token: 4393 return this 4394 4395 if self._match(TokenType.L_PAREN): 4396 aliases = self.expression( 4397 exp.Aliases, 4398 this=this, 4399 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4400 ) 4401 self._match_r_paren(aliases) 4402 return aliases 4403 4404 alias = self._parse_id_var(any_token) 4405 4406 if alias: 4407 return self.expression(exp.Alias, this=this, alias=alias) 4408 4409 return this 4410 4411 def _parse_id_var( 4412 self, 4413 any_token: bool = True, 4414 tokens: t.Optional[t.Collection[TokenType]] = None, 4415 ) -> t.Optional[exp.Expression]: 4416 identifier = self._parse_identifier() 4417 4418 if identifier: 4419 return identifier 4420 4421 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4422 quoted = self._prev.token_type == TokenType.STRING 4423 return exp.Identifier(this=self._prev.text, quoted=quoted) 4424 4425 return None 4426 4427 def _parse_string(self) -> t.Optional[exp.Expression]: 4428 if self._match(TokenType.STRING): 4429 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4430 return self._parse_placeholder() 4431 4432 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4433 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4434 4435 def _parse_number(self) -> t.Optional[exp.Expression]: 4436 if self._match(TokenType.NUMBER): 4437 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4438 return self._parse_placeholder() 4439 4440 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4441 if self._match(TokenType.IDENTIFIER): 4442 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4443 return self._parse_placeholder() 4444 4445 def _parse_var( 4446 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4447 ) -> t.Optional[exp.Expression]: 4448 if ( 4449 (any_token and self._advance_any()) 4450 or self._match(TokenType.VAR) 4451 or (self._match_set(tokens) if tokens else False) 4452 ): 4453 return self.expression(exp.Var, this=self._prev.text) 4454 return self._parse_placeholder() 4455 4456 def _advance_any(self) -> t.Optional[Token]: 4457 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4458 self._advance() 4459 return self._prev 4460 return None 4461 4462 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4463 return self._parse_var() or self._parse_string() 4464 4465 def _parse_null(self) -> t.Optional[exp.Expression]: 4466 if self._match(TokenType.NULL): 4467 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4468 return self._parse_placeholder() 4469 4470 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4471 if self._match(TokenType.TRUE): 4472 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4473 if self._match(TokenType.FALSE): 4474 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4475 return self._parse_placeholder() 4476 4477 def _parse_star(self) -> t.Optional[exp.Expression]: 4478 if self._match(TokenType.STAR): 4479 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4480 return self._parse_placeholder() 4481 4482 def _parse_parameter(self) -> exp.Parameter: 4483 wrapped = self._match(TokenType.L_BRACE) 4484 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4485 self._match(TokenType.R_BRACE) 4486 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4487 4488 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4489 if self._match_set(self.PLACEHOLDER_PARSERS): 4490 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4491 if placeholder: 4492 return placeholder 4493 self._advance(-1) 4494 return None 4495 4496 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4497 if not self._match(TokenType.EXCEPT): 4498 return None 4499 if self._match(TokenType.L_PAREN, advance=False): 4500 return self._parse_wrapped_csv(self._parse_column) 4501 return self._parse_csv(self._parse_column) 4502 4503 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4504 if not self._match(TokenType.REPLACE): 4505 return None 4506 if self._match(TokenType.L_PAREN, advance=False): 4507 return self._parse_wrapped_csv(self._parse_expression) 4508 return self._parse_expressions() 4509 4510 def _parse_csv( 4511 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4512 ) -> t.List[exp.Expression]: 4513 parse_result = parse_method() 4514 items = [parse_result] if parse_result is not None else [] 4515 4516 while self._match(sep): 4517 self._add_comments(parse_result) 4518 parse_result = parse_method() 4519 if parse_result is not None: 4520 items.append(parse_result) 4521 4522 return items 4523 4524 def _parse_tokens( 4525 self, parse_method: t.Callable, expressions: t.Dict 4526 ) -> t.Optional[exp.Expression]: 4527 this = parse_method() 4528 4529 while self._match_set(expressions): 4530 this = self.expression( 4531 expressions[self._prev.token_type], 4532 this=this, 4533 comments=self._prev_comments, 4534 expression=parse_method(), 4535 ) 4536 4537 return this 4538 4539 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4540 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4541 4542 def _parse_wrapped_csv( 4543 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4544 ) -> t.List[exp.Expression]: 4545 return self._parse_wrapped( 4546 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4547 ) 4548 4549 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4550 wrapped = self._match(TokenType.L_PAREN) 4551 if not wrapped and not optional: 4552 self.raise_error("Expecting (") 4553 parse_result = parse_method() 4554 if wrapped: 4555 self._match_r_paren() 4556 return parse_result 4557 4558 def _parse_expressions(self) -> t.List[exp.Expression]: 4559 return self._parse_csv(self._parse_expression) 4560 4561 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4562 return self._parse_select() or self._parse_set_operations( 4563 self._parse_expression() if alias else self._parse_conjunction() 4564 ) 4565 4566 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4567 return self._parse_query_modifiers( 4568 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4569 ) 4570 4571 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4572 this = None 4573 if self._match_texts(self.TRANSACTION_KIND): 4574 this = self._prev.text 4575 4576 self._match_texts({"TRANSACTION", "WORK"}) 4577 4578 modes = [] 4579 while True: 4580 mode = [] 4581 while self._match(TokenType.VAR): 4582 mode.append(self._prev.text) 4583 4584 if mode: 4585 modes.append(" ".join(mode)) 4586 if not self._match(TokenType.COMMA): 4587 break 4588 4589 return self.expression(exp.Transaction, this=this, modes=modes) 4590 4591 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4592 chain = None 4593 savepoint = None 4594 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4595 4596 self._match_texts({"TRANSACTION", "WORK"}) 4597 4598 if self._match_text_seq("TO"): 4599 self._match_text_seq("SAVEPOINT") 4600 savepoint = self._parse_id_var() 4601 4602 if self._match(TokenType.AND): 4603 chain = not self._match_text_seq("NO") 4604 self._match_text_seq("CHAIN") 4605 4606 if is_rollback: 4607 return self.expression(exp.Rollback, savepoint=savepoint) 4608 4609 return self.expression(exp.Commit, chain=chain) 4610 4611 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4612 if not self._match_text_seq("ADD"): 4613 return None 4614 4615 self._match(TokenType.COLUMN) 4616 exists_column = self._parse_exists(not_=True) 4617 expression = self._parse_field_def() 4618 4619 if expression: 4620 expression.set("exists", exists_column) 4621 4622 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4623 if self._match_texts(("FIRST", "AFTER")): 4624 position = self._prev.text 4625 column_position = self.expression( 4626 exp.ColumnPosition, this=self._parse_column(), position=position 4627 ) 4628 expression.set("position", column_position) 4629 4630 return expression 4631 4632 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4633 drop = self._match(TokenType.DROP) and self._parse_drop() 4634 if drop and not isinstance(drop, exp.Command): 4635 drop.set("kind", drop.args.get("kind", "COLUMN")) 4636 return drop 4637 4638 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4639 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4640 return self.expression( 4641 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4642 ) 4643 4644 def _parse_add_constraint(self) -> exp.AddConstraint: 4645 this = None 4646 kind = self._prev.token_type 4647 4648 if kind == TokenType.CONSTRAINT: 4649 this = self._parse_id_var() 4650 4651 if self._match_text_seq("CHECK"): 4652 expression = self._parse_wrapped(self._parse_conjunction) 4653 enforced = self._match_text_seq("ENFORCED") 4654 4655 return self.expression( 4656 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4657 ) 4658 4659 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4660 expression = self._parse_foreign_key() 4661 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4662 expression = self._parse_primary_key() 4663 else: 4664 expression = None 4665 4666 return self.expression(exp.AddConstraint, this=this, expression=expression) 4667 4668 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4669 index = self._index - 1 4670 4671 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4672 return self._parse_csv(self._parse_add_constraint) 4673 4674 self._retreat(index) 4675 return self._parse_csv(self._parse_add_column) 4676 4677 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4678 self._match(TokenType.COLUMN) 4679 column = self._parse_field(any_token=True) 4680 4681 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4682 return self.expression(exp.AlterColumn, this=column, drop=True) 4683 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4684 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4685 4686 self._match_text_seq("SET", "DATA") 4687 return self.expression( 4688 exp.AlterColumn, 4689 this=column, 4690 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4691 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4692 using=self._match(TokenType.USING) and self._parse_conjunction(), 4693 ) 4694 4695 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4696 index = self._index - 1 4697 4698 partition_exists = self._parse_exists() 4699 if self._match(TokenType.PARTITION, advance=False): 4700 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4701 4702 self._retreat(index) 4703 return self._parse_csv(self._parse_drop_column) 4704 4705 def _parse_alter_table_rename(self) -> exp.RenameTable: 4706 self._match_text_seq("TO") 4707 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4708 4709 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4710 start = self._prev 4711 4712 if not self._match(TokenType.TABLE): 4713 return self._parse_as_command(start) 4714 4715 exists = self._parse_exists() 4716 this = self._parse_table(schema=True) 4717 4718 if self._next: 4719 self._advance() 4720 4721 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4722 if parser: 4723 actions = ensure_list(parser(self)) 4724 4725 if not self._curr: 4726 return self.expression( 4727 exp.AlterTable, 4728 this=this, 4729 exists=exists, 4730 actions=actions, 4731 ) 4732 return self._parse_as_command(start) 4733 4734 def _parse_merge(self) -> exp.Merge: 4735 self._match(TokenType.INTO) 4736 target = self._parse_table() 4737 4738 if target and self._match(TokenType.ALIAS, advance=False): 4739 target.set("alias", self._parse_table_alias()) 4740 4741 self._match(TokenType.USING) 4742 using = self._parse_table() 4743 4744 self._match(TokenType.ON) 4745 on = self._parse_conjunction() 4746 4747 whens = [] 4748 while self._match(TokenType.WHEN): 4749 matched = not self._match(TokenType.NOT) 4750 self._match_text_seq("MATCHED") 4751 source = ( 4752 False 4753 if self._match_text_seq("BY", "TARGET") 4754 else self._match_text_seq("BY", "SOURCE") 4755 ) 4756 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4757 4758 self._match(TokenType.THEN) 4759 4760 if self._match(TokenType.INSERT): 4761 _this = self._parse_star() 4762 if _this: 4763 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4764 else: 4765 then = self.expression( 4766 exp.Insert, 4767 this=self._parse_value(), 4768 expression=self._match(TokenType.VALUES) and self._parse_value(), 4769 ) 4770 elif self._match(TokenType.UPDATE): 4771 expressions = self._parse_star() 4772 if expressions: 4773 then = self.expression(exp.Update, expressions=expressions) 4774 else: 4775 then = self.expression( 4776 exp.Update, 4777 expressions=self._match(TokenType.SET) 4778 and self._parse_csv(self._parse_equality), 4779 ) 4780 elif self._match(TokenType.DELETE): 4781 then = self.expression(exp.Var, this=self._prev.text) 4782 else: 4783 then = None 4784 4785 whens.append( 4786 self.expression( 4787 exp.When, 4788 matched=matched, 4789 source=source, 4790 condition=condition, 4791 then=then, 4792 ) 4793 ) 4794 4795 return self.expression( 4796 exp.Merge, 4797 this=target, 4798 using=using, 4799 on=on, 4800 expressions=whens, 4801 ) 4802 4803 def _parse_show(self) -> t.Optional[exp.Expression]: 4804 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4805 if parser: 4806 return parser(self) 4807 self._advance() 4808 return self.expression(exp.Show, this=self._prev.text.upper()) 4809 4810 def _parse_set_item_assignment( 4811 self, kind: t.Optional[str] = None 4812 ) -> t.Optional[exp.Expression]: 4813 index = self._index 4814 4815 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4816 return self._parse_set_transaction(global_=kind == "GLOBAL") 4817 4818 left = self._parse_primary() or self._parse_id_var() 4819 4820 if not self._match_texts(("=", "TO")): 4821 self._retreat(index) 4822 return None 4823 4824 right = self._parse_statement() or self._parse_id_var() 4825 this = self.expression(exp.EQ, this=left, expression=right) 4826 4827 return self.expression(exp.SetItem, this=this, kind=kind) 4828 4829 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4830 self._match_text_seq("TRANSACTION") 4831 characteristics = self._parse_csv( 4832 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4833 ) 4834 return self.expression( 4835 exp.SetItem, 4836 expressions=characteristics, 4837 kind="TRANSACTION", 4838 **{"global": global_}, # type: ignore 4839 ) 4840 4841 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4842 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4843 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4844 4845 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4846 index = self._index 4847 set_ = self.expression( 4848 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4849 ) 4850 4851 if self._curr: 4852 self._retreat(index) 4853 return self._parse_as_command(self._prev) 4854 4855 return set_ 4856 4857 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4858 for option in options: 4859 if self._match_text_seq(*option.split(" ")): 4860 return exp.var(option) 4861 return None 4862 4863 def _parse_as_command(self, start: Token) -> exp.Command: 4864 while self._curr: 4865 self._advance() 4866 text = self._find_sql(start, self._prev) 4867 size = len(start.text) 4868 return exp.Command(this=text[:size], expression=text[size:]) 4869 4870 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4871 settings = [] 4872 4873 self._match_l_paren() 4874 kind = self._parse_id_var() 4875 4876 if self._match(TokenType.L_PAREN): 4877 while True: 4878 key = self._parse_id_var() 4879 value = self._parse_primary() 4880 4881 if not key and value is None: 4882 break 4883 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4884 self._match(TokenType.R_PAREN) 4885 4886 self._match_r_paren() 4887 4888 return self.expression( 4889 exp.DictProperty, 4890 this=this, 4891 kind=kind.this if kind else None, 4892 settings=settings, 4893 ) 4894 4895 def _parse_dict_range(self, this: str) -> exp.DictRange: 4896 self._match_l_paren() 4897 has_min = self._match_text_seq("MIN") 4898 if has_min: 4899 min = self._parse_var() or self._parse_primary() 4900 self._match_text_seq("MAX") 4901 max = self._parse_var() or self._parse_primary() 4902 else: 4903 max = self._parse_var() or self._parse_primary() 4904 min = exp.Literal.number(0) 4905 self._match_r_paren() 4906 return self.expression(exp.DictRange, this=this, min=min, max=max) 4907 4908 def _parse_comprehension(self, this: exp.Expression) -> exp.Comprehension: 4909 expression = self._parse_column() 4910 self._match(TokenType.IN) 4911 iterator = self._parse_column() 4912 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 4913 return self.expression( 4914 exp.Comprehension, 4915 this=this, 4916 expression=expression, 4917 iterator=iterator, 4918 condition=condition, 4919 ) 4920 4921 def _find_parser( 4922 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4923 ) -> t.Optional[t.Callable]: 4924 if not self._curr: 4925 return None 4926 4927 index = self._index 4928 this = [] 4929 while True: 4930 # The current token might be multiple words 4931 curr = self._curr.text.upper() 4932 key = curr.split(" ") 4933 this.append(curr) 4934 4935 self._advance() 4936 result, trie = in_trie(trie, key) 4937 if result == TrieResult.FAILED: 4938 break 4939 4940 if result == TrieResult.EXISTS: 4941 subparser = parsers[" ".join(this)] 4942 return subparser 4943 4944 self._retreat(index) 4945 return None 4946 4947 def _match(self, token_type, advance=True, expression=None): 4948 if not self._curr: 4949 return None 4950 4951 if self._curr.token_type == token_type: 4952 if advance: 4953 self._advance() 4954 self._add_comments(expression) 4955 return True 4956 4957 return None 4958 4959 def _match_set(self, types, advance=True): 4960 if not self._curr: 4961 return None 4962 4963 if self._curr.token_type in types: 4964 if advance: 4965 self._advance() 4966 return True 4967 4968 return None 4969 4970 def _match_pair(self, token_type_a, token_type_b, advance=True): 4971 if not self._curr or not self._next: 4972 return None 4973 4974 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4975 if advance: 4976 self._advance(2) 4977 return True 4978 4979 return None 4980 4981 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4982 if not self._match(TokenType.L_PAREN, expression=expression): 4983 self.raise_error("Expecting (") 4984 4985 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4986 if not self._match(TokenType.R_PAREN, expression=expression): 4987 self.raise_error("Expecting )") 4988 4989 def _match_texts(self, texts, advance=True): 4990 if self._curr and self._curr.text.upper() in texts: 4991 if advance: 4992 self._advance() 4993 return True 4994 return False 4995 4996 def _match_text_seq(self, *texts, advance=True): 4997 index = self._index 4998 for text in texts: 4999 if self._curr and self._curr.text.upper() == text: 5000 self._advance() 5001 else: 5002 self._retreat(index) 5003 return False 5004 5005 if not advance: 5006 self._retreat(index) 5007 5008 return True 5009 5010 @t.overload 5011 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5012 ... 5013 5014 @t.overload 5015 def _replace_columns_with_dots( 5016 self, this: t.Optional[exp.Expression] 5017 ) -> t.Optional[exp.Expression]: 5018 ... 5019 5020 def _replace_columns_with_dots(self, this): 5021 if isinstance(this, exp.Dot): 5022 exp.replace_children(this, self._replace_columns_with_dots) 5023 elif isinstance(this, exp.Column): 5024 exp.replace_children(this, self._replace_columns_with_dots) 5025 table = this.args.get("table") 5026 this = ( 5027 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5028 ) 5029 5030 return this 5031 5032 def _replace_lambda( 5033 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5034 ) -> t.Optional[exp.Expression]: 5035 if not node: 5036 return node 5037 5038 for column in node.find_all(exp.Column): 5039 if column.parts[0].name in lambda_variables: 5040 dot_or_id = column.to_dot() if column.table else column.this 5041 parent = column.parent 5042 5043 while isinstance(parent, exp.Dot): 5044 if not isinstance(parent.parent, exp.Dot): 5045 parent.replace(dot_or_id) 5046 break 5047 parent = parent.parent 5048 else: 5049 if column is node: 5050 node = dot_or_id 5051 else: 5052 column.replace(dot_or_id) 5053 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
886 def __init__( 887 self, 888 error_level: t.Optional[ErrorLevel] = None, 889 error_message_context: int = 100, 890 max_errors: int = 3, 891 ): 892 self.error_level = error_level or ErrorLevel.IMMEDIATE 893 self.error_message_context = error_message_context 894 self.max_errors = max_errors 895 self._tokenizer = self.TOKENIZER_CLASS() 896 self.reset()
908 def parse( 909 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 910 ) -> t.List[t.Optional[exp.Expression]]: 911 """ 912 Parses a list of tokens and returns a list of syntax trees, one tree 913 per parsed SQL statement. 914 915 Args: 916 raw_tokens: The list of tokens. 917 sql: The original SQL string, used to produce helpful debug messages. 918 919 Returns: 920 The list of the produced syntax trees. 921 """ 922 return self._parse( 923 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 924 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
926 def parse_into( 927 self, 928 expression_types: exp.IntoType, 929 raw_tokens: t.List[Token], 930 sql: t.Optional[str] = None, 931 ) -> t.List[t.Optional[exp.Expression]]: 932 """ 933 Parses a list of tokens into a given Expression type. If a collection of Expression 934 types is given instead, this method will try to parse the token list into each one 935 of them, stopping at the first for which the parsing succeeds. 936 937 Args: 938 expression_types: The expression type(s) to try and parse the token list into. 939 raw_tokens: The list of tokens. 940 sql: The original SQL string, used to produce helpful debug messages. 941 942 Returns: 943 The target Expression. 944 """ 945 errors = [] 946 for expression_type in ensure_list(expression_types): 947 parser = self.EXPRESSION_PARSERS.get(expression_type) 948 if not parser: 949 raise TypeError(f"No parser registered for {expression_type}") 950 951 try: 952 return self._parse(parser, raw_tokens, sql) 953 except ParseError as e: 954 e.errors[0]["into_expression"] = expression_type 955 errors.append(e) 956 957 raise ParseError( 958 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 959 errors=merge_errors(errors), 960 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
997 def check_errors(self) -> None: 998 """Logs or raises any found errors, depending on the chosen error level setting.""" 999 if self.error_level == ErrorLevel.WARN: 1000 for error in self.errors: 1001 logger.error(str(error)) 1002 elif self.error_level == ErrorLevel.RAISE and self.errors: 1003 raise ParseError( 1004 concat_messages(self.errors, self.max_errors), 1005 errors=merge_errors(self.errors), 1006 )
Logs or raises any found errors, depending on the chosen error level setting.
1008 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1009 """ 1010 Appends an error in the list of recorded errors or raises it, depending on the chosen 1011 error level setting. 1012 """ 1013 token = token or self._curr or self._prev or Token.string("") 1014 start = token.start 1015 end = token.end + 1 1016 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1017 highlight = self.sql[start:end] 1018 end_context = self.sql[end : end + self.error_message_context] 1019 1020 error = ParseError.new( 1021 f"{message}. Line {token.line}, Col: {token.col}.\n" 1022 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1023 description=message, 1024 line=token.line, 1025 col=token.col, 1026 start_context=start_context, 1027 highlight=highlight, 1028 end_context=end_context, 1029 ) 1030 1031 if self.error_level == ErrorLevel.IMMEDIATE: 1032 raise error 1033 1034 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1036 def expression( 1037 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1038 ) -> E: 1039 """ 1040 Creates a new, validated Expression. 1041 1042 Args: 1043 exp_class: The expression class to instantiate. 1044 comments: An optional list of comments to attach to the expression. 1045 kwargs: The arguments to set for the expression along with their respective values. 1046 1047 Returns: 1048 The target expression. 1049 """ 1050 instance = exp_class(**kwargs) 1051 instance.add_comments(comments) if comments else self._add_comments(instance) 1052 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1059 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1060 """ 1061 Validates an Expression, making sure that all its mandatory arguments are set. 1062 1063 Args: 1064 expression: The expression to validate. 1065 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1066 1067 Returns: 1068 The validated expression. 1069 """ 1070 if self.error_level != ErrorLevel.IGNORE: 1071 for error_message in expression.error_messages(args): 1072 self.raise_error(error_message) 1073 1074 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.