sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.IPADDRESS, 189 TokenType.IPPREFIX, 190 TokenType.ENUM, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = { 202 *Tokenizer.SINGLE_TOKENS.values(), 203 TokenType.SELECT, 204 } 205 206 DB_CREATABLES = { 207 TokenType.DATABASE, 208 TokenType.SCHEMA, 209 TokenType.TABLE, 210 TokenType.VIEW, 211 TokenType.DICTIONARY, 212 } 213 214 CREATABLES = { 215 TokenType.COLUMN, 216 TokenType.FUNCTION, 217 TokenType.INDEX, 218 TokenType.PROCEDURE, 219 *DB_CREATABLES, 220 } 221 222 # Tokens that can represent identifiers 223 ID_VAR_TOKENS = { 224 TokenType.VAR, 225 TokenType.ANTI, 226 TokenType.APPLY, 227 TokenType.ASC, 228 TokenType.AUTO_INCREMENT, 229 TokenType.BEGIN, 230 TokenType.CACHE, 231 TokenType.CASE, 232 TokenType.COLLATE, 233 TokenType.COMMAND, 234 TokenType.COMMENT, 235 TokenType.COMMIT, 236 TokenType.CONSTRAINT, 237 TokenType.DEFAULT, 238 TokenType.DELETE, 239 TokenType.DESC, 240 TokenType.DESCRIBE, 241 TokenType.DICTIONARY, 242 TokenType.DIV, 243 TokenType.END, 244 TokenType.EXECUTE, 245 TokenType.ESCAPE, 246 TokenType.FALSE, 247 TokenType.FIRST, 248 TokenType.FILTER, 249 TokenType.FORMAT, 250 TokenType.FULL, 251 TokenType.IF, 252 TokenType.IS, 253 TokenType.ISNULL, 254 TokenType.INTERVAL, 255 TokenType.KEEP, 256 TokenType.LEFT, 257 TokenType.LOAD, 258 TokenType.MERGE, 259 TokenType.NATURAL, 260 TokenType.NEXT, 261 TokenType.OFFSET, 262 TokenType.ORDINALITY, 263 TokenType.OVERWRITE, 264 TokenType.PARTITION, 265 TokenType.PERCENT, 266 TokenType.PIVOT, 267 TokenType.PRAGMA, 268 TokenType.RANGE, 269 TokenType.REFERENCES, 270 TokenType.RIGHT, 271 TokenType.ROW, 272 TokenType.ROWS, 273 TokenType.SEMI, 274 TokenType.SET, 275 TokenType.SETTINGS, 276 TokenType.SHOW, 277 TokenType.TEMPORARY, 278 TokenType.TOP, 279 TokenType.TRUE, 280 TokenType.UNIQUE, 281 TokenType.UNPIVOT, 282 TokenType.UPDATE, 283 TokenType.VOLATILE, 284 TokenType.WINDOW, 285 *CREATABLES, 286 *SUBQUERY_PREDICATES, 287 *TYPE_TOKENS, 288 *NO_PAREN_FUNCTIONS, 289 } 290 291 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 292 293 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 294 TokenType.APPLY, 295 TokenType.ASOF, 296 TokenType.FULL, 297 TokenType.LEFT, 298 TokenType.LOCK, 299 TokenType.NATURAL, 300 TokenType.OFFSET, 301 TokenType.RIGHT, 302 TokenType.WINDOW, 303 } 304 305 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 306 307 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 308 309 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 310 311 FUNC_TOKENS = { 312 TokenType.COMMAND, 313 TokenType.CURRENT_DATE, 314 TokenType.CURRENT_DATETIME, 315 TokenType.CURRENT_TIMESTAMP, 316 TokenType.CURRENT_TIME, 317 TokenType.CURRENT_USER, 318 TokenType.FILTER, 319 TokenType.FIRST, 320 TokenType.FORMAT, 321 TokenType.GLOB, 322 TokenType.IDENTIFIER, 323 TokenType.INDEX, 324 TokenType.ISNULL, 325 TokenType.ILIKE, 326 TokenType.LIKE, 327 TokenType.MERGE, 328 TokenType.OFFSET, 329 TokenType.PRIMARY_KEY, 330 TokenType.RANGE, 331 TokenType.REPLACE, 332 TokenType.RLIKE, 333 TokenType.ROW, 334 TokenType.UNNEST, 335 TokenType.VAR, 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.DATE, 339 TokenType.DATETIME, 340 TokenType.TABLE, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMPTZ, 343 TokenType.WINDOW, 344 TokenType.XOR, 345 *TYPE_TOKENS, 346 *SUBQUERY_PREDICATES, 347 } 348 349 CONJUNCTION = { 350 TokenType.AND: exp.And, 351 TokenType.OR: exp.Or, 352 } 353 354 EQUALITY = { 355 TokenType.EQ: exp.EQ, 356 TokenType.NEQ: exp.NEQ, 357 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 358 } 359 360 COMPARISON = { 361 TokenType.GT: exp.GT, 362 TokenType.GTE: exp.GTE, 363 TokenType.LT: exp.LT, 364 TokenType.LTE: exp.LTE, 365 } 366 367 BITWISE = { 368 TokenType.AMP: exp.BitwiseAnd, 369 TokenType.CARET: exp.BitwiseXor, 370 TokenType.PIPE: exp.BitwiseOr, 371 TokenType.DPIPE: exp.DPipe, 372 } 373 374 TERM = { 375 TokenType.DASH: exp.Sub, 376 TokenType.PLUS: exp.Add, 377 TokenType.MOD: exp.Mod, 378 TokenType.COLLATE: exp.Collate, 379 } 380 381 FACTOR = { 382 TokenType.DIV: exp.IntDiv, 383 TokenType.LR_ARROW: exp.Distance, 384 TokenType.SLASH: exp.Div, 385 TokenType.STAR: exp.Mul, 386 } 387 388 TIMESTAMPS = { 389 TokenType.TIME, 390 TokenType.TIMESTAMP, 391 TokenType.TIMESTAMPTZ, 392 TokenType.TIMESTAMPLTZ, 393 } 394 395 SET_OPERATIONS = { 396 TokenType.UNION, 397 TokenType.INTERSECT, 398 TokenType.EXCEPT, 399 } 400 401 JOIN_METHODS = { 402 TokenType.NATURAL, 403 TokenType.ASOF, 404 } 405 406 JOIN_SIDES = { 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.FULL, 410 } 411 412 JOIN_KINDS = { 413 TokenType.INNER, 414 TokenType.OUTER, 415 TokenType.CROSS, 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 JOIN_HINTS: t.Set[str] = set() 421 422 LAMBDAS = { 423 TokenType.ARROW: lambda self, expressions: self.expression( 424 exp.Lambda, 425 this=self._replace_lambda( 426 self._parse_conjunction(), 427 {node.name for node in expressions}, 428 ), 429 expressions=expressions, 430 ), 431 TokenType.FARROW: lambda self, expressions: self.expression( 432 exp.Kwarg, 433 this=exp.var(expressions[0].name), 434 expression=self._parse_conjunction(), 435 ), 436 } 437 438 COLUMN_OPERATORS = { 439 TokenType.DOT: None, 440 TokenType.DCOLON: lambda self, this, to: self.expression( 441 exp.Cast if self.STRICT_CAST else exp.TryCast, 442 this=this, 443 to=to, 444 ), 445 TokenType.ARROW: lambda self, this, path: self.expression( 446 exp.JSONExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DARROW: lambda self, this, path: self.expression( 451 exp.JSONExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtract, 457 this=this, 458 expression=path, 459 ), 460 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 461 exp.JSONBExtractScalar, 462 this=this, 463 expression=path, 464 ), 465 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 466 exp.JSONBContains, 467 this=this, 468 expression=key, 469 ), 470 } 471 472 EXPRESSION_PARSERS = { 473 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 474 exp.Column: lambda self: self._parse_column(), 475 exp.Condition: lambda self: self._parse_conjunction(), 476 exp.DataType: lambda self: self._parse_types(), 477 exp.Expression: lambda self: self._parse_statement(), 478 exp.From: lambda self: self._parse_from(), 479 exp.Group: lambda self: self._parse_group(), 480 exp.Having: lambda self: self._parse_having(), 481 exp.Identifier: lambda self: self._parse_id_var(), 482 exp.Join: lambda self: self._parse_join(), 483 exp.Lambda: lambda self: self._parse_lambda(), 484 exp.Lateral: lambda self: self._parse_lateral(), 485 exp.Limit: lambda self: self._parse_limit(), 486 exp.Offset: lambda self: self._parse_offset(), 487 exp.Order: lambda self: self._parse_order(), 488 exp.Ordered: lambda self: self._parse_ordered(), 489 exp.Properties: lambda self: self._parse_properties(), 490 exp.Qualify: lambda self: self._parse_qualify(), 491 exp.Returning: lambda self: self._parse_returning(), 492 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 493 exp.Table: lambda self: self._parse_table_parts(), 494 exp.TableAlias: lambda self: self._parse_table_alias(), 495 exp.Where: lambda self: self._parse_where(), 496 exp.Window: lambda self: self._parse_named_window(), 497 exp.With: lambda self: self._parse_with(), 498 "JOIN_TYPE": lambda self: self._parse_join_parts(), 499 } 500 501 STATEMENT_PARSERS = { 502 TokenType.ALTER: lambda self: self._parse_alter(), 503 TokenType.BEGIN: lambda self: self._parse_transaction(), 504 TokenType.CACHE: lambda self: self._parse_cache(), 505 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 506 TokenType.COMMENT: lambda self: self._parse_comment(), 507 TokenType.CREATE: lambda self: self._parse_create(), 508 TokenType.DELETE: lambda self: self._parse_delete(), 509 TokenType.DESC: lambda self: self._parse_describe(), 510 TokenType.DESCRIBE: lambda self: self._parse_describe(), 511 TokenType.DROP: lambda self: self._parse_drop(), 512 TokenType.FROM: lambda self: exp.select("*").from_( 513 t.cast(exp.From, self._parse_from(skip_from_token=True)) 514 ), 515 TokenType.INSERT: lambda self: self._parse_insert(), 516 TokenType.LOAD: lambda self: self._parse_load(), 517 TokenType.MERGE: lambda self: self._parse_merge(), 518 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 519 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 520 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 521 TokenType.SET: lambda self: self._parse_set(), 522 TokenType.UNCACHE: lambda self: self._parse_uncache(), 523 TokenType.UPDATE: lambda self: self._parse_update(), 524 TokenType.USE: lambda self: self.expression( 525 exp.Use, 526 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 527 and exp.var(self._prev.text), 528 this=self._parse_table(schema=False), 529 ), 530 } 531 532 UNARY_PARSERS = { 533 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 534 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 535 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 536 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 537 } 538 539 PRIMARY_PARSERS = { 540 TokenType.STRING: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=True 542 ), 543 TokenType.NUMBER: lambda self, token: self.expression( 544 exp.Literal, this=token.text, is_string=False 545 ), 546 TokenType.STAR: lambda self, _: self.expression( 547 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 548 ), 549 TokenType.NULL: lambda self, _: self.expression(exp.Null), 550 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 551 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 552 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 553 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 554 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 555 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 556 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 557 exp.National, this=token.text 558 ), 559 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 560 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 561 } 562 563 PLACEHOLDER_PARSERS = { 564 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 565 TokenType.PARAMETER: lambda self: self._parse_parameter(), 566 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 567 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 568 else None, 569 } 570 571 RANGE_PARSERS = { 572 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 573 TokenType.GLOB: binary_range_parser(exp.Glob), 574 TokenType.ILIKE: binary_range_parser(exp.ILike), 575 TokenType.IN: lambda self, this: self._parse_in(this), 576 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 577 TokenType.IS: lambda self, this: self._parse_is(this), 578 TokenType.LIKE: binary_range_parser(exp.Like), 579 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 580 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 581 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 582 } 583 584 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 585 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 586 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 587 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 588 "CHARACTER SET": lambda self: self._parse_character_set(), 589 "CHECKSUM": lambda self: self._parse_checksum(), 590 "CLUSTER BY": lambda self: self._parse_cluster(), 591 "CLUSTERED": lambda self: self._parse_clustered_by(), 592 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 593 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 594 "COPY": lambda self: self._parse_copy_property(), 595 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 596 "DEFINER": lambda self: self._parse_definer(), 597 "DETERMINISTIC": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "DISTKEY": lambda self: self._parse_distkey(), 601 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 602 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 603 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 604 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 605 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 606 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "FREESPACE": lambda self: self._parse_freespace(), 608 "HEAP": lambda self: self.expression(exp.HeapProperty), 609 "IMMUTABLE": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 611 ), 612 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 613 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 614 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 615 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 616 "LIKE": lambda self: self._parse_create_like(), 617 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 618 "LOCK": lambda self: self._parse_locking(), 619 "LOCKING": lambda self: self._parse_locking(), 620 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 621 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 622 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 623 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 624 "NO": lambda self: self._parse_no_property(), 625 "ON": lambda self: self._parse_on_property(), 626 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 627 "PARTITION BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 630 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 631 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 632 "RETURNS": lambda self: self._parse_returns(), 633 "ROW": lambda self: self._parse_row(), 634 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 635 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 636 "SETTINGS": lambda self: self.expression( 637 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 638 ), 639 "SORTKEY": lambda self: self._parse_sortkey(), 640 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 641 "STABLE": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("STABLE") 643 ), 644 "STORED": lambda self: self._parse_stored(), 645 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 646 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 647 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 648 "TO": lambda self: self._parse_to_table(), 649 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 650 "TTL": lambda self: self._parse_ttl(), 651 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "VOLATILE": lambda self: self._parse_volatile_property(), 653 "WITH": lambda self: self._parse_with_property(), 654 } 655 656 CONSTRAINT_PARSERS = { 657 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 658 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 659 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 660 "CHARACTER SET": lambda self: self.expression( 661 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 662 ), 663 "CHECK": lambda self: self.expression( 664 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 665 ), 666 "COLLATE": lambda self: self.expression( 667 exp.CollateColumnConstraint, this=self._parse_var() 668 ), 669 "COMMENT": lambda self: self.expression( 670 exp.CommentColumnConstraint, this=self._parse_string() 671 ), 672 "COMPRESS": lambda self: self._parse_compress(), 673 "DEFAULT": lambda self: self.expression( 674 exp.DefaultColumnConstraint, this=self._parse_bitwise() 675 ), 676 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 677 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 678 "FORMAT": lambda self: self.expression( 679 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "GENERATED": lambda self: self._parse_generated_as_identity(), 682 "IDENTITY": lambda self: self._parse_auto_increment(), 683 "INLINE": lambda self: self._parse_inline(), 684 "LIKE": lambda self: self._parse_create_like(), 685 "NOT": lambda self: self._parse_not_constraint(), 686 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 687 "ON": lambda self: self._match(TokenType.UPDATE) 688 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 689 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(), 691 "REFERENCES": lambda self: self._parse_references(match=False), 692 "TITLE": lambda self: self.expression( 693 exp.TitleColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 696 "UNIQUE": lambda self: self._parse_unique(), 697 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 698 } 699 700 ALTER_PARSERS = { 701 "ADD": lambda self: self._parse_alter_table_add(), 702 "ALTER": lambda self: self._parse_alter_table_alter(), 703 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 704 "DROP": lambda self: self._parse_alter_table_drop(), 705 "RENAME": lambda self: self._parse_alter_table_rename(), 706 } 707 708 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 709 710 NO_PAREN_FUNCTION_PARSERS = { 711 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 712 TokenType.CASE: lambda self: self._parse_case(), 713 TokenType.IF: lambda self: self._parse_if(), 714 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 715 exp.NextValueFor, 716 this=self._parse_column(), 717 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 718 ), 719 } 720 721 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 722 723 FUNCTION_PARSERS = { 724 "ANY_VALUE": lambda self: self._parse_any_value(), 725 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 726 "CONCAT": lambda self: self._parse_concat(), 727 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 728 "DECODE": lambda self: self._parse_decode(), 729 "EXTRACT": lambda self: self._parse_extract(), 730 "JSON_OBJECT": lambda self: self._parse_json_object(), 731 "LOG": lambda self: self._parse_logarithm(), 732 "MATCH": lambda self: self._parse_match_against(), 733 "OPENJSON": lambda self: self._parse_open_json(), 734 "POSITION": lambda self: self._parse_position(), 735 "SAFE_CAST": lambda self: self._parse_cast(False), 736 "STRING_AGG": lambda self: self._parse_string_agg(), 737 "SUBSTRING": lambda self: self._parse_substring(), 738 "TRIM": lambda self: self._parse_trim(), 739 "TRY_CAST": lambda self: self._parse_cast(False), 740 "TRY_CONVERT": lambda self: self._parse_convert(False), 741 } 742 743 QUERY_MODIFIER_PARSERS = { 744 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 745 TokenType.WHERE: lambda self: ("where", self._parse_where()), 746 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 747 TokenType.HAVING: lambda self: ("having", self._parse_having()), 748 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 749 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 750 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 751 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 752 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 753 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 754 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 755 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 756 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 757 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.CLUSTER_BY: lambda self: ( 759 "cluster", 760 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 761 ), 762 TokenType.DISTRIBUTE_BY: lambda self: ( 763 "distribute", 764 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 765 ), 766 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 767 } 768 769 SET_PARSERS = { 770 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 771 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 772 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 773 "TRANSACTION": lambda self: self._parse_set_transaction(), 774 } 775 776 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 777 778 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 779 780 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 781 782 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 783 784 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 785 786 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 787 TRANSACTION_CHARACTERISTICS = { 788 "ISOLATION LEVEL REPEATABLE READ", 789 "ISOLATION LEVEL READ COMMITTED", 790 "ISOLATION LEVEL READ UNCOMMITTED", 791 "ISOLATION LEVEL SERIALIZABLE", 792 "READ WRITE", 793 "READ ONLY", 794 } 795 796 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 797 798 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 799 800 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 801 802 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 803 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 804 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 805 806 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 807 808 STRICT_CAST = True 809 810 # A NULL arg in CONCAT yields NULL by default 811 CONCAT_NULL_OUTPUTS_STRING = False 812 813 PREFIXED_PIVOT_COLUMNS = False 814 IDENTIFY_PIVOT_STRINGS = False 815 816 LOG_BASE_FIRST = True 817 LOG_DEFAULTS_TO_LN = False 818 819 __slots__ = ( 820 "error_level", 821 "error_message_context", 822 "max_errors", 823 "sql", 824 "errors", 825 "_tokens", 826 "_index", 827 "_curr", 828 "_next", 829 "_prev", 830 "_prev_comments", 831 ) 832 833 # Autofilled 834 INDEX_OFFSET: int = 0 835 UNNEST_COLUMN_ONLY: bool = False 836 ALIAS_POST_TABLESAMPLE: bool = False 837 STRICT_STRING_CONCAT = False 838 NULL_ORDERING: str = "nulls_are_small" 839 SHOW_TRIE: t.Dict = {} 840 SET_TRIE: t.Dict = {} 841 FORMAT_MAPPING: t.Dict[str, str] = {} 842 FORMAT_TRIE: t.Dict = {} 843 TIME_MAPPING: t.Dict[str, str] = {} 844 TIME_TRIE: t.Dict = {} 845 846 def __init__( 847 self, 848 error_level: t.Optional[ErrorLevel] = None, 849 error_message_context: int = 100, 850 max_errors: int = 3, 851 ): 852 self.error_level = error_level or ErrorLevel.IMMEDIATE 853 self.error_message_context = error_message_context 854 self.max_errors = max_errors 855 self.reset() 856 857 def reset(self): 858 self.sql = "" 859 self.errors = [] 860 self._tokens = [] 861 self._index = 0 862 self._curr = None 863 self._next = None 864 self._prev = None 865 self._prev_comments = None 866 867 def parse( 868 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 869 ) -> t.List[t.Optional[exp.Expression]]: 870 """ 871 Parses a list of tokens and returns a list of syntax trees, one tree 872 per parsed SQL statement. 873 874 Args: 875 raw_tokens: The list of tokens. 876 sql: The original SQL string, used to produce helpful debug messages. 877 878 Returns: 879 The list of the produced syntax trees. 880 """ 881 return self._parse( 882 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 883 ) 884 885 def parse_into( 886 self, 887 expression_types: exp.IntoType, 888 raw_tokens: t.List[Token], 889 sql: t.Optional[str] = None, 890 ) -> t.List[t.Optional[exp.Expression]]: 891 """ 892 Parses a list of tokens into a given Expression type. If a collection of Expression 893 types is given instead, this method will try to parse the token list into each one 894 of them, stopping at the first for which the parsing succeeds. 895 896 Args: 897 expression_types: The expression type(s) to try and parse the token list into. 898 raw_tokens: The list of tokens. 899 sql: The original SQL string, used to produce helpful debug messages. 900 901 Returns: 902 The target Expression. 903 """ 904 errors = [] 905 for expression_type in ensure_list(expression_types): 906 parser = self.EXPRESSION_PARSERS.get(expression_type) 907 if not parser: 908 raise TypeError(f"No parser registered for {expression_type}") 909 910 try: 911 return self._parse(parser, raw_tokens, sql) 912 except ParseError as e: 913 e.errors[0]["into_expression"] = expression_type 914 errors.append(e) 915 916 raise ParseError( 917 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 918 errors=merge_errors(errors), 919 ) from errors[-1] 920 921 def _parse( 922 self, 923 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 924 raw_tokens: t.List[Token], 925 sql: t.Optional[str] = None, 926 ) -> t.List[t.Optional[exp.Expression]]: 927 self.reset() 928 self.sql = sql or "" 929 930 total = len(raw_tokens) 931 chunks: t.List[t.List[Token]] = [[]] 932 933 for i, token in enumerate(raw_tokens): 934 if token.token_type == TokenType.SEMICOLON: 935 if i < total - 1: 936 chunks.append([]) 937 else: 938 chunks[-1].append(token) 939 940 expressions = [] 941 942 for tokens in chunks: 943 self._index = -1 944 self._tokens = tokens 945 self._advance() 946 947 expressions.append(parse_method(self)) 948 949 if self._index < len(self._tokens): 950 self.raise_error("Invalid expression / Unexpected token") 951 952 self.check_errors() 953 954 return expressions 955 956 def check_errors(self) -> None: 957 """Logs or raises any found errors, depending on the chosen error level setting.""" 958 if self.error_level == ErrorLevel.WARN: 959 for error in self.errors: 960 logger.error(str(error)) 961 elif self.error_level == ErrorLevel.RAISE and self.errors: 962 raise ParseError( 963 concat_messages(self.errors, self.max_errors), 964 errors=merge_errors(self.errors), 965 ) 966 967 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 968 """ 969 Appends an error in the list of recorded errors or raises it, depending on the chosen 970 error level setting. 971 """ 972 token = token or self._curr or self._prev or Token.string("") 973 start = token.start 974 end = token.end + 1 975 start_context = self.sql[max(start - self.error_message_context, 0) : start] 976 highlight = self.sql[start:end] 977 end_context = self.sql[end : end + self.error_message_context] 978 979 error = ParseError.new( 980 f"{message}. Line {token.line}, Col: {token.col}.\n" 981 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 982 description=message, 983 line=token.line, 984 col=token.col, 985 start_context=start_context, 986 highlight=highlight, 987 end_context=end_context, 988 ) 989 990 if self.error_level == ErrorLevel.IMMEDIATE: 991 raise error 992 993 self.errors.append(error) 994 995 def expression( 996 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 997 ) -> E: 998 """ 999 Creates a new, validated Expression. 1000 1001 Args: 1002 exp_class: The expression class to instantiate. 1003 comments: An optional list of comments to attach to the expression. 1004 kwargs: The arguments to set for the expression along with their respective values. 1005 1006 Returns: 1007 The target expression. 1008 """ 1009 instance = exp_class(**kwargs) 1010 instance.add_comments(comments) if comments else self._add_comments(instance) 1011 return self.validate_expression(instance) 1012 1013 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1014 if expression and self._prev_comments: 1015 expression.add_comments(self._prev_comments) 1016 self._prev_comments = None 1017 1018 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1019 """ 1020 Validates an Expression, making sure that all its mandatory arguments are set. 1021 1022 Args: 1023 expression: The expression to validate. 1024 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1025 1026 Returns: 1027 The validated expression. 1028 """ 1029 if self.error_level != ErrorLevel.IGNORE: 1030 for error_message in expression.error_messages(args): 1031 self.raise_error(error_message) 1032 1033 return expression 1034 1035 def _find_sql(self, start: Token, end: Token) -> str: 1036 return self.sql[start.start : end.end + 1] 1037 1038 def _advance(self, times: int = 1) -> None: 1039 self._index += times 1040 self._curr = seq_get(self._tokens, self._index) 1041 self._next = seq_get(self._tokens, self._index + 1) 1042 1043 if self._index > 0: 1044 self._prev = self._tokens[self._index - 1] 1045 self._prev_comments = self._prev.comments 1046 else: 1047 self._prev = None 1048 self._prev_comments = None 1049 1050 def _retreat(self, index: int) -> None: 1051 if index != self._index: 1052 self._advance(index - self._index) 1053 1054 def _parse_command(self) -> exp.Command: 1055 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1056 1057 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1058 start = self._prev 1059 exists = self._parse_exists() if allow_exists else None 1060 1061 self._match(TokenType.ON) 1062 1063 kind = self._match_set(self.CREATABLES) and self._prev 1064 if not kind: 1065 return self._parse_as_command(start) 1066 1067 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1068 this = self._parse_user_defined_function(kind=kind.token_type) 1069 elif kind.token_type == TokenType.TABLE: 1070 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1071 elif kind.token_type == TokenType.COLUMN: 1072 this = self._parse_column() 1073 else: 1074 this = self._parse_id_var() 1075 1076 self._match(TokenType.IS) 1077 1078 return self.expression( 1079 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1080 ) 1081 1082 def _parse_to_table( 1083 self, 1084 ) -> exp.ToTableProperty: 1085 table = self._parse_table_parts(schema=True) 1086 return self.expression(exp.ToTableProperty, this=table) 1087 1088 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1089 def _parse_ttl(self) -> exp.Expression: 1090 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1091 this = self._parse_bitwise() 1092 1093 if self._match_text_seq("DELETE"): 1094 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1095 if self._match_text_seq("RECOMPRESS"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1098 ) 1099 if self._match_text_seq("TO", "DISK"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1102 ) 1103 if self._match_text_seq("TO", "VOLUME"): 1104 return self.expression( 1105 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1106 ) 1107 1108 return this 1109 1110 expressions = self._parse_csv(_parse_ttl_action) 1111 where = self._parse_where() 1112 group = self._parse_group() 1113 1114 aggregates = None 1115 if group and self._match(TokenType.SET): 1116 aggregates = self._parse_csv(self._parse_set_item) 1117 1118 return self.expression( 1119 exp.MergeTreeTTL, 1120 expressions=expressions, 1121 where=where, 1122 group=group, 1123 aggregates=aggregates, 1124 ) 1125 1126 def _parse_statement(self) -> t.Optional[exp.Expression]: 1127 if self._curr is None: 1128 return None 1129 1130 if self._match_set(self.STATEMENT_PARSERS): 1131 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1132 1133 if self._match_set(Tokenizer.COMMANDS): 1134 return self._parse_command() 1135 1136 expression = self._parse_expression() 1137 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1138 return self._parse_query_modifiers(expression) 1139 1140 def _parse_drop(self) -> exp.Drop | exp.Command: 1141 start = self._prev 1142 temporary = self._match(TokenType.TEMPORARY) 1143 materialized = self._match_text_seq("MATERIALIZED") 1144 1145 kind = self._match_set(self.CREATABLES) and self._prev.text 1146 if not kind: 1147 return self._parse_as_command(start) 1148 1149 return self.expression( 1150 exp.Drop, 1151 comments=start.comments, 1152 exists=self._parse_exists(), 1153 this=self._parse_table(schema=True), 1154 kind=kind, 1155 temporary=temporary, 1156 materialized=materialized, 1157 cascade=self._match_text_seq("CASCADE"), 1158 constraints=self._match_text_seq("CONSTRAINTS"), 1159 purge=self._match_text_seq("PURGE"), 1160 ) 1161 1162 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1163 return ( 1164 self._match(TokenType.IF) 1165 and (not not_ or self._match(TokenType.NOT)) 1166 and self._match(TokenType.EXISTS) 1167 ) 1168 1169 def _parse_create(self) -> exp.Create | exp.Command: 1170 # Note: this can't be None because we've matched a statement parser 1171 start = self._prev 1172 replace = start.text.upper() == "REPLACE" or self._match_pair( 1173 TokenType.OR, TokenType.REPLACE 1174 ) 1175 unique = self._match(TokenType.UNIQUE) 1176 1177 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1178 self._advance() 1179 1180 properties = None 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not create_token: 1184 # exp.Properties.Location.POST_CREATE 1185 properties = self._parse_properties() 1186 create_token = self._match_set(self.CREATABLES) and self._prev 1187 1188 if not properties or not create_token: 1189 return self._parse_as_command(start) 1190 1191 exists = self._parse_exists(not_=True) 1192 this = None 1193 expression: t.Optional[exp.Expression] = None 1194 indexes = None 1195 no_schema_binding = None 1196 begin = None 1197 clone = None 1198 1199 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1200 nonlocal properties 1201 if properties and temp_props: 1202 properties.expressions.extend(temp_props.expressions) 1203 elif temp_props: 1204 properties = temp_props 1205 1206 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1207 this = self._parse_user_defined_function(kind=create_token.token_type) 1208 1209 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1210 extend_props(self._parse_properties()) 1211 1212 self._match(TokenType.ALIAS) 1213 1214 if self._match(TokenType.COMMAND): 1215 expression = self._parse_as_command(self._prev) 1216 else: 1217 begin = self._match(TokenType.BEGIN) 1218 return_ = self._match_text_seq("RETURN") 1219 expression = self._parse_statement() 1220 1221 if return_: 1222 expression = self.expression(exp.Return, this=expression) 1223 elif create_token.token_type == TokenType.INDEX: 1224 this = self._parse_index(index=self._parse_id_var()) 1225 elif create_token.token_type in self.DB_CREATABLES: 1226 table_parts = self._parse_table_parts(schema=True) 1227 1228 # exp.Properties.Location.POST_NAME 1229 self._match(TokenType.COMMA) 1230 extend_props(self._parse_properties(before=True)) 1231 1232 this = self._parse_schema(this=table_parts) 1233 1234 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1235 extend_props(self._parse_properties()) 1236 1237 self._match(TokenType.ALIAS) 1238 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1239 # exp.Properties.Location.POST_ALIAS 1240 extend_props(self._parse_properties()) 1241 1242 expression = self._parse_ddl_select() 1243 1244 if create_token.token_type == TokenType.TABLE: 1245 # exp.Properties.Location.POST_EXPRESSION 1246 extend_props(self._parse_properties()) 1247 1248 indexes = [] 1249 while True: 1250 index = self._parse_index() 1251 1252 # exp.Properties.Location.POST_INDEX 1253 extend_props(self._parse_properties()) 1254 1255 if not index: 1256 break 1257 else: 1258 self._match(TokenType.COMMA) 1259 indexes.append(index) 1260 elif create_token.token_type == TokenType.VIEW: 1261 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1262 no_schema_binding = True 1263 1264 if self._match_text_seq("CLONE"): 1265 clone = self._parse_table(schema=True) 1266 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1267 clone_kind = ( 1268 self._match(TokenType.L_PAREN) 1269 and self._match_texts(self.CLONE_KINDS) 1270 and self._prev.text.upper() 1271 ) 1272 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1273 self._match(TokenType.R_PAREN) 1274 clone = self.expression( 1275 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1276 ) 1277 1278 return self.expression( 1279 exp.Create, 1280 this=this, 1281 kind=create_token.text, 1282 replace=replace, 1283 unique=unique, 1284 expression=expression, 1285 exists=exists, 1286 properties=properties, 1287 indexes=indexes, 1288 no_schema_binding=no_schema_binding, 1289 begin=begin, 1290 clone=clone, 1291 ) 1292 1293 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1294 # only used for teradata currently 1295 self._match(TokenType.COMMA) 1296 1297 kwargs = { 1298 "no": self._match_text_seq("NO"), 1299 "dual": self._match_text_seq("DUAL"), 1300 "before": self._match_text_seq("BEFORE"), 1301 "default": self._match_text_seq("DEFAULT"), 1302 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1303 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1304 "after": self._match_text_seq("AFTER"), 1305 "minimum": self._match_texts(("MIN", "MINIMUM")), 1306 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1307 } 1308 1309 if self._match_texts(self.PROPERTY_PARSERS): 1310 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1311 try: 1312 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1313 except TypeError: 1314 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1315 1316 return None 1317 1318 def _parse_property(self) -> t.Optional[exp.Expression]: 1319 if self._match_texts(self.PROPERTY_PARSERS): 1320 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1321 1322 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1323 return self._parse_character_set(default=True) 1324 1325 if self._match_text_seq("COMPOUND", "SORTKEY"): 1326 return self._parse_sortkey(compound=True) 1327 1328 if self._match_text_seq("SQL", "SECURITY"): 1329 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1330 1331 assignment = self._match_pair( 1332 TokenType.VAR, TokenType.EQ, advance=False 1333 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1334 1335 if assignment: 1336 key = self._parse_var_or_string() 1337 self._match(TokenType.EQ) 1338 return self.expression(exp.Property, this=key, value=self._parse_column()) 1339 1340 return None 1341 1342 def _parse_stored(self) -> exp.FileFormatProperty: 1343 self._match(TokenType.ALIAS) 1344 1345 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1346 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1347 1348 return self.expression( 1349 exp.FileFormatProperty, 1350 this=self.expression( 1351 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1352 ) 1353 if input_format or output_format 1354 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1355 ) 1356 1357 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1358 self._match(TokenType.EQ) 1359 self._match(TokenType.ALIAS) 1360 return self.expression(exp_class, this=self._parse_field()) 1361 1362 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1363 properties = [] 1364 while True: 1365 if before: 1366 prop = self._parse_property_before() 1367 else: 1368 prop = self._parse_property() 1369 1370 if not prop: 1371 break 1372 for p in ensure_list(prop): 1373 properties.append(p) 1374 1375 if properties: 1376 return self.expression(exp.Properties, expressions=properties) 1377 1378 return None 1379 1380 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1381 return self.expression( 1382 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1383 ) 1384 1385 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1386 if self._index >= 2: 1387 pre_volatile_token = self._tokens[self._index - 2] 1388 else: 1389 pre_volatile_token = None 1390 1391 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1392 return exp.VolatileProperty() 1393 1394 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1395 1396 def _parse_with_property( 1397 self, 1398 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1399 if self._match(TokenType.L_PAREN, advance=False): 1400 return self._parse_wrapped_csv(self._parse_property) 1401 1402 if self._match_text_seq("JOURNAL"): 1403 return self._parse_withjournaltable() 1404 1405 if self._match_text_seq("DATA"): 1406 return self._parse_withdata(no=False) 1407 elif self._match_text_seq("NO", "DATA"): 1408 return self._parse_withdata(no=True) 1409 1410 if not self._next: 1411 return None 1412 1413 return self._parse_withisolatedloading() 1414 1415 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1416 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1417 self._match(TokenType.EQ) 1418 1419 user = self._parse_id_var() 1420 self._match(TokenType.PARAMETER) 1421 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1422 1423 if not user or not host: 1424 return None 1425 1426 return exp.DefinerProperty(this=f"{user}@{host}") 1427 1428 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1429 self._match(TokenType.TABLE) 1430 self._match(TokenType.EQ) 1431 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1432 1433 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1434 return self.expression(exp.LogProperty, no=no) 1435 1436 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1437 return self.expression(exp.JournalProperty, **kwargs) 1438 1439 def _parse_checksum(self) -> exp.ChecksumProperty: 1440 self._match(TokenType.EQ) 1441 1442 on = None 1443 if self._match(TokenType.ON): 1444 on = True 1445 elif self._match_text_seq("OFF"): 1446 on = False 1447 1448 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1449 1450 def _parse_cluster(self) -> exp.Cluster: 1451 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1452 1453 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1454 self._match_text_seq("BY") 1455 1456 self._match_l_paren() 1457 expressions = self._parse_csv(self._parse_column) 1458 self._match_r_paren() 1459 1460 if self._match_text_seq("SORTED", "BY"): 1461 self._match_l_paren() 1462 sorted_by = self._parse_csv(self._parse_ordered) 1463 self._match_r_paren() 1464 else: 1465 sorted_by = None 1466 1467 self._match(TokenType.INTO) 1468 buckets = self._parse_number() 1469 self._match_text_seq("BUCKETS") 1470 1471 return self.expression( 1472 exp.ClusteredByProperty, 1473 expressions=expressions, 1474 sorted_by=sorted_by, 1475 buckets=buckets, 1476 ) 1477 1478 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1479 if not self._match_text_seq("GRANTS"): 1480 self._retreat(self._index - 1) 1481 return None 1482 1483 return self.expression(exp.CopyGrantsProperty) 1484 1485 def _parse_freespace(self) -> exp.FreespaceProperty: 1486 self._match(TokenType.EQ) 1487 return self.expression( 1488 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1489 ) 1490 1491 def _parse_mergeblockratio( 1492 self, no: bool = False, default: bool = False 1493 ) -> exp.MergeBlockRatioProperty: 1494 if self._match(TokenType.EQ): 1495 return self.expression( 1496 exp.MergeBlockRatioProperty, 1497 this=self._parse_number(), 1498 percent=self._match(TokenType.PERCENT), 1499 ) 1500 1501 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1502 1503 def _parse_datablocksize( 1504 self, 1505 default: t.Optional[bool] = None, 1506 minimum: t.Optional[bool] = None, 1507 maximum: t.Optional[bool] = None, 1508 ) -> exp.DataBlocksizeProperty: 1509 self._match(TokenType.EQ) 1510 size = self._parse_number() 1511 1512 units = None 1513 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1514 units = self._prev.text 1515 1516 return self.expression( 1517 exp.DataBlocksizeProperty, 1518 size=size, 1519 units=units, 1520 default=default, 1521 minimum=minimum, 1522 maximum=maximum, 1523 ) 1524 1525 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1526 self._match(TokenType.EQ) 1527 always = self._match_text_seq("ALWAYS") 1528 manual = self._match_text_seq("MANUAL") 1529 never = self._match_text_seq("NEVER") 1530 default = self._match_text_seq("DEFAULT") 1531 1532 autotemp = None 1533 if self._match_text_seq("AUTOTEMP"): 1534 autotemp = self._parse_schema() 1535 1536 return self.expression( 1537 exp.BlockCompressionProperty, 1538 always=always, 1539 manual=manual, 1540 never=never, 1541 default=default, 1542 autotemp=autotemp, 1543 ) 1544 1545 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1546 no = self._match_text_seq("NO") 1547 concurrent = self._match_text_seq("CONCURRENT") 1548 self._match_text_seq("ISOLATED", "LOADING") 1549 for_all = self._match_text_seq("FOR", "ALL") 1550 for_insert = self._match_text_seq("FOR", "INSERT") 1551 for_none = self._match_text_seq("FOR", "NONE") 1552 return self.expression( 1553 exp.IsolatedLoadingProperty, 1554 no=no, 1555 concurrent=concurrent, 1556 for_all=for_all, 1557 for_insert=for_insert, 1558 for_none=for_none, 1559 ) 1560 1561 def _parse_locking(self) -> exp.LockingProperty: 1562 if self._match(TokenType.TABLE): 1563 kind = "TABLE" 1564 elif self._match(TokenType.VIEW): 1565 kind = "VIEW" 1566 elif self._match(TokenType.ROW): 1567 kind = "ROW" 1568 elif self._match_text_seq("DATABASE"): 1569 kind = "DATABASE" 1570 else: 1571 kind = None 1572 1573 if kind in ("DATABASE", "TABLE", "VIEW"): 1574 this = self._parse_table_parts() 1575 else: 1576 this = None 1577 1578 if self._match(TokenType.FOR): 1579 for_or_in = "FOR" 1580 elif self._match(TokenType.IN): 1581 for_or_in = "IN" 1582 else: 1583 for_or_in = None 1584 1585 if self._match_text_seq("ACCESS"): 1586 lock_type = "ACCESS" 1587 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1588 lock_type = "EXCLUSIVE" 1589 elif self._match_text_seq("SHARE"): 1590 lock_type = "SHARE" 1591 elif self._match_text_seq("READ"): 1592 lock_type = "READ" 1593 elif self._match_text_seq("WRITE"): 1594 lock_type = "WRITE" 1595 elif self._match_text_seq("CHECKSUM"): 1596 lock_type = "CHECKSUM" 1597 else: 1598 lock_type = None 1599 1600 override = self._match_text_seq("OVERRIDE") 1601 1602 return self.expression( 1603 exp.LockingProperty, 1604 this=this, 1605 kind=kind, 1606 for_or_in=for_or_in, 1607 lock_type=lock_type, 1608 override=override, 1609 ) 1610 1611 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1612 if self._match(TokenType.PARTITION_BY): 1613 return self._parse_csv(self._parse_conjunction) 1614 return [] 1615 1616 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1617 self._match(TokenType.EQ) 1618 return self.expression( 1619 exp.PartitionedByProperty, 1620 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1621 ) 1622 1623 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1624 if self._match_text_seq("AND", "STATISTICS"): 1625 statistics = True 1626 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1627 statistics = False 1628 else: 1629 statistics = None 1630 1631 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1632 1633 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1634 if self._match_text_seq("PRIMARY", "INDEX"): 1635 return exp.NoPrimaryIndexProperty() 1636 return None 1637 1638 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1639 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1640 return exp.OnCommitProperty() 1641 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1642 return exp.OnCommitProperty(delete=True) 1643 return None 1644 1645 def _parse_distkey(self) -> exp.DistKeyProperty: 1646 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1647 1648 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1649 table = self._parse_table(schema=True) 1650 1651 options = [] 1652 while self._match_texts(("INCLUDING", "EXCLUDING")): 1653 this = self._prev.text.upper() 1654 1655 id_var = self._parse_id_var() 1656 if not id_var: 1657 return None 1658 1659 options.append( 1660 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1661 ) 1662 1663 return self.expression(exp.LikeProperty, this=table, expressions=options) 1664 1665 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1666 return self.expression( 1667 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1668 ) 1669 1670 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1671 self._match(TokenType.EQ) 1672 return self.expression( 1673 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1674 ) 1675 1676 def _parse_returns(self) -> exp.ReturnsProperty: 1677 value: t.Optional[exp.Expression] 1678 is_table = self._match(TokenType.TABLE) 1679 1680 if is_table: 1681 if self._match(TokenType.LT): 1682 value = self.expression( 1683 exp.Schema, 1684 this="TABLE", 1685 expressions=self._parse_csv(self._parse_struct_types), 1686 ) 1687 if not self._match(TokenType.GT): 1688 self.raise_error("Expecting >") 1689 else: 1690 value = self._parse_schema(exp.var("TABLE")) 1691 else: 1692 value = self._parse_types() 1693 1694 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1695 1696 def _parse_describe(self) -> exp.Describe: 1697 kind = self._match_set(self.CREATABLES) and self._prev.text 1698 this = self._parse_table() 1699 return self.expression(exp.Describe, this=this, kind=kind) 1700 1701 def _parse_insert(self) -> exp.Insert: 1702 comments = ensure_list(self._prev_comments) 1703 overwrite = self._match(TokenType.OVERWRITE) 1704 ignore = self._match(TokenType.IGNORE) 1705 local = self._match_text_seq("LOCAL") 1706 alternative = None 1707 1708 if self._match_text_seq("DIRECTORY"): 1709 this: t.Optional[exp.Expression] = self.expression( 1710 exp.Directory, 1711 this=self._parse_var_or_string(), 1712 local=local, 1713 row_format=self._parse_row_format(match_row=True), 1714 ) 1715 else: 1716 if self._match(TokenType.OR): 1717 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1718 1719 self._match(TokenType.INTO) 1720 comments += ensure_list(self._prev_comments) 1721 self._match(TokenType.TABLE) 1722 this = self._parse_table(schema=True) 1723 1724 returning = self._parse_returning() 1725 1726 return self.expression( 1727 exp.Insert, 1728 comments=comments, 1729 this=this, 1730 exists=self._parse_exists(), 1731 partition=self._parse_partition(), 1732 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1733 and self._parse_conjunction(), 1734 expression=self._parse_ddl_select(), 1735 conflict=self._parse_on_conflict(), 1736 returning=returning or self._parse_returning(), 1737 overwrite=overwrite, 1738 alternative=alternative, 1739 ignore=ignore, 1740 ) 1741 1742 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1743 conflict = self._match_text_seq("ON", "CONFLICT") 1744 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1745 1746 if not conflict and not duplicate: 1747 return None 1748 1749 nothing = None 1750 expressions = None 1751 key = None 1752 constraint = None 1753 1754 if conflict: 1755 if self._match_text_seq("ON", "CONSTRAINT"): 1756 constraint = self._parse_id_var() 1757 else: 1758 key = self._parse_csv(self._parse_value) 1759 1760 self._match_text_seq("DO") 1761 if self._match_text_seq("NOTHING"): 1762 nothing = True 1763 else: 1764 self._match(TokenType.UPDATE) 1765 self._match(TokenType.SET) 1766 expressions = self._parse_csv(self._parse_equality) 1767 1768 return self.expression( 1769 exp.OnConflict, 1770 duplicate=duplicate, 1771 expressions=expressions, 1772 nothing=nothing, 1773 key=key, 1774 constraint=constraint, 1775 ) 1776 1777 def _parse_returning(self) -> t.Optional[exp.Returning]: 1778 if not self._match(TokenType.RETURNING): 1779 return None 1780 return self.expression( 1781 exp.Returning, 1782 expressions=self._parse_csv(self._parse_expression), 1783 into=self._match(TokenType.INTO) and self._parse_table_part(), 1784 ) 1785 1786 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1787 if not self._match(TokenType.FORMAT): 1788 return None 1789 return self._parse_row_format() 1790 1791 def _parse_row_format( 1792 self, match_row: bool = False 1793 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1794 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1795 return None 1796 1797 if self._match_text_seq("SERDE"): 1798 this = self._parse_string() 1799 1800 serde_properties = None 1801 if self._match(TokenType.SERDE_PROPERTIES): 1802 serde_properties = self.expression( 1803 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1804 ) 1805 1806 return self.expression( 1807 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1808 ) 1809 1810 self._match_text_seq("DELIMITED") 1811 1812 kwargs = {} 1813 1814 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1815 kwargs["fields"] = self._parse_string() 1816 if self._match_text_seq("ESCAPED", "BY"): 1817 kwargs["escaped"] = self._parse_string() 1818 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1819 kwargs["collection_items"] = self._parse_string() 1820 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1821 kwargs["map_keys"] = self._parse_string() 1822 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1823 kwargs["lines"] = self._parse_string() 1824 if self._match_text_seq("NULL", "DEFINED", "AS"): 1825 kwargs["null"] = self._parse_string() 1826 1827 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1828 1829 def _parse_load(self) -> exp.LoadData | exp.Command: 1830 if self._match_text_seq("DATA"): 1831 local = self._match_text_seq("LOCAL") 1832 self._match_text_seq("INPATH") 1833 inpath = self._parse_string() 1834 overwrite = self._match(TokenType.OVERWRITE) 1835 self._match_pair(TokenType.INTO, TokenType.TABLE) 1836 1837 return self.expression( 1838 exp.LoadData, 1839 this=self._parse_table(schema=True), 1840 local=local, 1841 overwrite=overwrite, 1842 inpath=inpath, 1843 partition=self._parse_partition(), 1844 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1845 serde=self._match_text_seq("SERDE") and self._parse_string(), 1846 ) 1847 return self._parse_as_command(self._prev) 1848 1849 def _parse_delete(self) -> exp.Delete: 1850 # This handles MySQL's "Multiple-Table Syntax" 1851 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1852 tables = None 1853 comments = self._prev_comments 1854 if not self._match(TokenType.FROM, advance=False): 1855 tables = self._parse_csv(self._parse_table) or None 1856 1857 returning = self._parse_returning() 1858 1859 return self.expression( 1860 exp.Delete, 1861 comments=comments, 1862 tables=tables, 1863 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1864 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1865 where=self._parse_where(), 1866 returning=returning or self._parse_returning(), 1867 limit=self._parse_limit(), 1868 ) 1869 1870 def _parse_update(self) -> exp.Update: 1871 comments = self._prev_comments 1872 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1873 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1874 returning = self._parse_returning() 1875 return self.expression( 1876 exp.Update, 1877 comments=comments, 1878 **{ # type: ignore 1879 "this": this, 1880 "expressions": expressions, 1881 "from": self._parse_from(joins=True), 1882 "where": self._parse_where(), 1883 "returning": returning or self._parse_returning(), 1884 "limit": self._parse_limit(), 1885 }, 1886 ) 1887 1888 def _parse_uncache(self) -> exp.Uncache: 1889 if not self._match(TokenType.TABLE): 1890 self.raise_error("Expecting TABLE after UNCACHE") 1891 1892 return self.expression( 1893 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1894 ) 1895 1896 def _parse_cache(self) -> exp.Cache: 1897 lazy = self._match_text_seq("LAZY") 1898 self._match(TokenType.TABLE) 1899 table = self._parse_table(schema=True) 1900 1901 options = [] 1902 if self._match_text_seq("OPTIONS"): 1903 self._match_l_paren() 1904 k = self._parse_string() 1905 self._match(TokenType.EQ) 1906 v = self._parse_string() 1907 options = [k, v] 1908 self._match_r_paren() 1909 1910 self._match(TokenType.ALIAS) 1911 return self.expression( 1912 exp.Cache, 1913 this=table, 1914 lazy=lazy, 1915 options=options, 1916 expression=self._parse_select(nested=True), 1917 ) 1918 1919 def _parse_partition(self) -> t.Optional[exp.Partition]: 1920 if not self._match(TokenType.PARTITION): 1921 return None 1922 1923 return self.expression( 1924 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1925 ) 1926 1927 def _parse_value(self) -> exp.Tuple: 1928 if self._match(TokenType.L_PAREN): 1929 expressions = self._parse_csv(self._parse_conjunction) 1930 self._match_r_paren() 1931 return self.expression(exp.Tuple, expressions=expressions) 1932 1933 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1934 # https://prestodb.io/docs/current/sql/values.html 1935 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1936 1937 def _parse_select( 1938 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1939 ) -> t.Optional[exp.Expression]: 1940 cte = self._parse_with() 1941 if cte: 1942 this = self._parse_statement() 1943 1944 if not this: 1945 self.raise_error("Failed to parse any statement following CTE") 1946 return cte 1947 1948 if "with" in this.arg_types: 1949 this.set("with", cte) 1950 else: 1951 self.raise_error(f"{this.key} does not support CTE") 1952 this = cte 1953 elif self._match(TokenType.SELECT): 1954 comments = self._prev_comments 1955 1956 hint = self._parse_hint() 1957 all_ = self._match(TokenType.ALL) 1958 distinct = self._match(TokenType.DISTINCT) 1959 1960 kind = ( 1961 self._match(TokenType.ALIAS) 1962 and self._match_texts(("STRUCT", "VALUE")) 1963 and self._prev.text 1964 ) 1965 1966 if distinct: 1967 distinct = self.expression( 1968 exp.Distinct, 1969 on=self._parse_value() if self._match(TokenType.ON) else None, 1970 ) 1971 1972 if all_ and distinct: 1973 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1974 1975 limit = self._parse_limit(top=True) 1976 expressions = self._parse_expressions() 1977 1978 this = self.expression( 1979 exp.Select, 1980 kind=kind, 1981 hint=hint, 1982 distinct=distinct, 1983 expressions=expressions, 1984 limit=limit, 1985 ) 1986 this.comments = comments 1987 1988 into = self._parse_into() 1989 if into: 1990 this.set("into", into) 1991 1992 from_ = self._parse_from() 1993 if from_: 1994 this.set("from", from_) 1995 1996 this = self._parse_query_modifiers(this) 1997 elif (table or nested) and self._match(TokenType.L_PAREN): 1998 if self._match(TokenType.PIVOT): 1999 this = self._parse_simplified_pivot() 2000 elif self._match(TokenType.FROM): 2001 this = exp.select("*").from_( 2002 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2003 ) 2004 else: 2005 this = self._parse_table() if table else self._parse_select(nested=True) 2006 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2007 2008 self._match_r_paren() 2009 2010 # We return early here so that the UNION isn't attached to the subquery by the 2011 # following call to _parse_set_operations, but instead becomes the parent node 2012 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2013 elif self._match(TokenType.VALUES): 2014 this = self.expression( 2015 exp.Values, 2016 expressions=self._parse_csv(self._parse_value), 2017 alias=self._parse_table_alias(), 2018 ) 2019 else: 2020 this = None 2021 2022 return self._parse_set_operations(this) 2023 2024 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2025 if not skip_with_token and not self._match(TokenType.WITH): 2026 return None 2027 2028 comments = self._prev_comments 2029 recursive = self._match(TokenType.RECURSIVE) 2030 2031 expressions = [] 2032 while True: 2033 expressions.append(self._parse_cte()) 2034 2035 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2036 break 2037 else: 2038 self._match(TokenType.WITH) 2039 2040 return self.expression( 2041 exp.With, comments=comments, expressions=expressions, recursive=recursive 2042 ) 2043 2044 def _parse_cte(self) -> exp.CTE: 2045 alias = self._parse_table_alias() 2046 if not alias or not alias.this: 2047 self.raise_error("Expected CTE to have alias") 2048 2049 self._match(TokenType.ALIAS) 2050 return self.expression( 2051 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2052 ) 2053 2054 def _parse_table_alias( 2055 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2056 ) -> t.Optional[exp.TableAlias]: 2057 any_token = self._match(TokenType.ALIAS) 2058 alias = ( 2059 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2060 or self._parse_string_as_identifier() 2061 ) 2062 2063 index = self._index 2064 if self._match(TokenType.L_PAREN): 2065 columns = self._parse_csv(self._parse_function_parameter) 2066 self._match_r_paren() if columns else self._retreat(index) 2067 else: 2068 columns = None 2069 2070 if not alias and not columns: 2071 return None 2072 2073 return self.expression(exp.TableAlias, this=alias, columns=columns) 2074 2075 def _parse_subquery( 2076 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2077 ) -> t.Optional[exp.Subquery]: 2078 if not this: 2079 return None 2080 2081 return self.expression( 2082 exp.Subquery, 2083 this=this, 2084 pivots=self._parse_pivots(), 2085 alias=self._parse_table_alias() if parse_alias else None, 2086 ) 2087 2088 def _parse_query_modifiers( 2089 self, this: t.Optional[exp.Expression] 2090 ) -> t.Optional[exp.Expression]: 2091 if isinstance(this, self.MODIFIABLES): 2092 for join in iter(self._parse_join, None): 2093 this.append("joins", join) 2094 for lateral in iter(self._parse_lateral, None): 2095 this.append("laterals", lateral) 2096 2097 while True: 2098 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2099 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2100 key, expression = parser(self) 2101 2102 if expression: 2103 this.set(key, expression) 2104 if key == "limit": 2105 offset = expression.args.pop("offset", None) 2106 if offset: 2107 this.set("offset", exp.Offset(expression=offset)) 2108 continue 2109 break 2110 return this 2111 2112 def _parse_hint(self) -> t.Optional[exp.Hint]: 2113 if self._match(TokenType.HINT): 2114 hints = [] 2115 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2116 hints.extend(hint) 2117 2118 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2119 self.raise_error("Expected */ after HINT") 2120 2121 return self.expression(exp.Hint, expressions=hints) 2122 2123 return None 2124 2125 def _parse_into(self) -> t.Optional[exp.Into]: 2126 if not self._match(TokenType.INTO): 2127 return None 2128 2129 temp = self._match(TokenType.TEMPORARY) 2130 unlogged = self._match_text_seq("UNLOGGED") 2131 self._match(TokenType.TABLE) 2132 2133 return self.expression( 2134 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2135 ) 2136 2137 def _parse_from( 2138 self, joins: bool = False, skip_from_token: bool = False 2139 ) -> t.Optional[exp.From]: 2140 if not skip_from_token and not self._match(TokenType.FROM): 2141 return None 2142 2143 return self.expression( 2144 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2145 ) 2146 2147 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2148 if not self._match(TokenType.MATCH_RECOGNIZE): 2149 return None 2150 2151 self._match_l_paren() 2152 2153 partition = self._parse_partition_by() 2154 order = self._parse_order() 2155 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2156 2157 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2158 rows = exp.var("ONE ROW PER MATCH") 2159 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2160 text = "ALL ROWS PER MATCH" 2161 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2162 text += f" SHOW EMPTY MATCHES" 2163 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2164 text += f" OMIT EMPTY MATCHES" 2165 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2166 text += f" WITH UNMATCHED ROWS" 2167 rows = exp.var(text) 2168 else: 2169 rows = None 2170 2171 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2172 text = "AFTER MATCH SKIP" 2173 if self._match_text_seq("PAST", "LAST", "ROW"): 2174 text += f" PAST LAST ROW" 2175 elif self._match_text_seq("TO", "NEXT", "ROW"): 2176 text += f" TO NEXT ROW" 2177 elif self._match_text_seq("TO", "FIRST"): 2178 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2179 elif self._match_text_seq("TO", "LAST"): 2180 text += f" TO LAST {self._advance_any().text}" # type: ignore 2181 after = exp.var(text) 2182 else: 2183 after = None 2184 2185 if self._match_text_seq("PATTERN"): 2186 self._match_l_paren() 2187 2188 if not self._curr: 2189 self.raise_error("Expecting )", self._curr) 2190 2191 paren = 1 2192 start = self._curr 2193 2194 while self._curr and paren > 0: 2195 if self._curr.token_type == TokenType.L_PAREN: 2196 paren += 1 2197 if self._curr.token_type == TokenType.R_PAREN: 2198 paren -= 1 2199 2200 end = self._prev 2201 self._advance() 2202 2203 if paren > 0: 2204 self.raise_error("Expecting )", self._curr) 2205 2206 pattern = exp.var(self._find_sql(start, end)) 2207 else: 2208 pattern = None 2209 2210 define = ( 2211 self._parse_csv( 2212 lambda: self.expression( 2213 exp.Alias, 2214 alias=self._parse_id_var(any_token=True), 2215 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2216 ) 2217 ) 2218 if self._match_text_seq("DEFINE") 2219 else None 2220 ) 2221 2222 self._match_r_paren() 2223 2224 return self.expression( 2225 exp.MatchRecognize, 2226 partition_by=partition, 2227 order=order, 2228 measures=measures, 2229 rows=rows, 2230 after=after, 2231 pattern=pattern, 2232 define=define, 2233 alias=self._parse_table_alias(), 2234 ) 2235 2236 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2237 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2238 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2239 2240 if outer_apply or cross_apply: 2241 this = self._parse_select(table=True) 2242 view = None 2243 outer = not cross_apply 2244 elif self._match(TokenType.LATERAL): 2245 this = self._parse_select(table=True) 2246 view = self._match(TokenType.VIEW) 2247 outer = self._match(TokenType.OUTER) 2248 else: 2249 return None 2250 2251 if not this: 2252 this = ( 2253 self._parse_unnest() 2254 or self._parse_function() 2255 or self._parse_id_var(any_token=False) 2256 ) 2257 2258 while self._match(TokenType.DOT): 2259 this = exp.Dot( 2260 this=this, 2261 expression=self._parse_function() or self._parse_id_var(any_token=False), 2262 ) 2263 2264 if view: 2265 table = self._parse_id_var(any_token=False) 2266 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2267 table_alias: t.Optional[exp.TableAlias] = self.expression( 2268 exp.TableAlias, this=table, columns=columns 2269 ) 2270 elif isinstance(this, exp.Subquery) and this.alias: 2271 # Ensures parity between the Subquery's and the Lateral's "alias" args 2272 table_alias = this.args["alias"].copy() 2273 else: 2274 table_alias = self._parse_table_alias() 2275 2276 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2277 2278 def _parse_join_parts( 2279 self, 2280 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2281 return ( 2282 self._match_set(self.JOIN_METHODS) and self._prev, 2283 self._match_set(self.JOIN_SIDES) and self._prev, 2284 self._match_set(self.JOIN_KINDS) and self._prev, 2285 ) 2286 2287 def _parse_join( 2288 self, skip_join_token: bool = False, parse_bracket: bool = False 2289 ) -> t.Optional[exp.Join]: 2290 if self._match(TokenType.COMMA): 2291 return self.expression(exp.Join, this=self._parse_table()) 2292 2293 index = self._index 2294 method, side, kind = self._parse_join_parts() 2295 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2296 join = self._match(TokenType.JOIN) 2297 2298 if not skip_join_token and not join: 2299 self._retreat(index) 2300 kind = None 2301 method = None 2302 side = None 2303 2304 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2305 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2306 2307 if not skip_join_token and not join and not outer_apply and not cross_apply: 2308 return None 2309 2310 if outer_apply: 2311 side = Token(TokenType.LEFT, "LEFT") 2312 2313 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2314 2315 if method: 2316 kwargs["method"] = method.text 2317 if side: 2318 kwargs["side"] = side.text 2319 if kind: 2320 kwargs["kind"] = kind.text 2321 if hint: 2322 kwargs["hint"] = hint 2323 2324 if self._match(TokenType.ON): 2325 kwargs["on"] = self._parse_conjunction() 2326 elif self._match(TokenType.USING): 2327 kwargs["using"] = self._parse_wrapped_id_vars() 2328 elif not (kind and kind.token_type == TokenType.CROSS): 2329 index = self._index 2330 joins = self._parse_joins() 2331 2332 if joins and self._match(TokenType.ON): 2333 kwargs["on"] = self._parse_conjunction() 2334 elif joins and self._match(TokenType.USING): 2335 kwargs["using"] = self._parse_wrapped_id_vars() 2336 else: 2337 joins = None 2338 self._retreat(index) 2339 2340 kwargs["this"].set("joins", joins) 2341 2342 return self.expression(exp.Join, **kwargs) 2343 2344 def _parse_index( 2345 self, 2346 index: t.Optional[exp.Expression] = None, 2347 ) -> t.Optional[exp.Index]: 2348 if index: 2349 unique = None 2350 primary = None 2351 amp = None 2352 2353 self._match(TokenType.ON) 2354 self._match(TokenType.TABLE) # hive 2355 table = self._parse_table_parts(schema=True) 2356 else: 2357 unique = self._match(TokenType.UNIQUE) 2358 primary = self._match_text_seq("PRIMARY") 2359 amp = self._match_text_seq("AMP") 2360 2361 if not self._match(TokenType.INDEX): 2362 return None 2363 2364 index = self._parse_id_var() 2365 table = None 2366 2367 using = self._parse_field() if self._match(TokenType.USING) else None 2368 2369 if self._match(TokenType.L_PAREN, advance=False): 2370 columns = self._parse_wrapped_csv(self._parse_ordered) 2371 else: 2372 columns = None 2373 2374 return self.expression( 2375 exp.Index, 2376 this=index, 2377 table=table, 2378 using=using, 2379 columns=columns, 2380 unique=unique, 2381 primary=primary, 2382 amp=amp, 2383 partition_by=self._parse_partition_by(), 2384 ) 2385 2386 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2387 hints: t.List[exp.Expression] = [] 2388 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2389 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2390 hints.append( 2391 self.expression( 2392 exp.WithTableHint, 2393 expressions=self._parse_csv( 2394 lambda: self._parse_function() or self._parse_var(any_token=True) 2395 ), 2396 ) 2397 ) 2398 self._match_r_paren() 2399 else: 2400 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2401 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2402 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2403 2404 self._match_texts({"INDEX", "KEY"}) 2405 if self._match(TokenType.FOR): 2406 hint.set("target", self._advance_any() and self._prev.text.upper()) 2407 2408 hint.set("expressions", self._parse_wrapped_id_vars()) 2409 hints.append(hint) 2410 2411 return hints or None 2412 2413 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2414 return ( 2415 (not schema and self._parse_function(optional_parens=False)) 2416 or self._parse_id_var(any_token=False) 2417 or self._parse_string_as_identifier() 2418 or self._parse_placeholder() 2419 ) 2420 2421 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2422 catalog = None 2423 db = None 2424 table = self._parse_table_part(schema=schema) 2425 2426 while self._match(TokenType.DOT): 2427 if catalog: 2428 # This allows nesting the table in arbitrarily many dot expressions if needed 2429 table = self.expression( 2430 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2431 ) 2432 else: 2433 catalog = db 2434 db = table 2435 table = self._parse_table_part(schema=schema) 2436 2437 if not table: 2438 self.raise_error(f"Expected table name but got {self._curr}") 2439 2440 return self.expression( 2441 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2442 ) 2443 2444 def _parse_table( 2445 self, 2446 schema: bool = False, 2447 joins: bool = False, 2448 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2449 parse_bracket: bool = False, 2450 ) -> t.Optional[exp.Expression]: 2451 lateral = self._parse_lateral() 2452 if lateral: 2453 return lateral 2454 2455 unnest = self._parse_unnest() 2456 if unnest: 2457 return unnest 2458 2459 values = self._parse_derived_table_values() 2460 if values: 2461 return values 2462 2463 subquery = self._parse_select(table=True) 2464 if subquery: 2465 if not subquery.args.get("pivots"): 2466 subquery.set("pivots", self._parse_pivots()) 2467 return subquery 2468 2469 bracket = parse_bracket and self._parse_bracket(None) 2470 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2471 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2472 2473 if schema: 2474 return self._parse_schema(this=this) 2475 2476 if self.ALIAS_POST_TABLESAMPLE: 2477 table_sample = self._parse_table_sample() 2478 2479 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2480 if alias: 2481 this.set("alias", alias) 2482 2483 if not this.args.get("pivots"): 2484 this.set("pivots", self._parse_pivots()) 2485 2486 this.set("hints", self._parse_table_hints()) 2487 2488 if not self.ALIAS_POST_TABLESAMPLE: 2489 table_sample = self._parse_table_sample() 2490 2491 if table_sample: 2492 table_sample.set("this", this) 2493 this = table_sample 2494 2495 if joins: 2496 for join in iter(self._parse_join, None): 2497 this.append("joins", join) 2498 2499 return this 2500 2501 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2502 if not self._match(TokenType.UNNEST): 2503 return None 2504 2505 expressions = self._parse_wrapped_csv(self._parse_type) 2506 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2507 2508 alias = self._parse_table_alias() if with_alias else None 2509 2510 if alias and self.UNNEST_COLUMN_ONLY: 2511 if alias.args.get("columns"): 2512 self.raise_error("Unexpected extra column alias in unnest.") 2513 2514 alias.set("columns", [alias.this]) 2515 alias.set("this", None) 2516 2517 offset = None 2518 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2519 self._match(TokenType.ALIAS) 2520 offset = self._parse_id_var() or exp.to_identifier("offset") 2521 2522 return self.expression( 2523 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2524 ) 2525 2526 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2527 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2528 if not is_derived and not self._match(TokenType.VALUES): 2529 return None 2530 2531 expressions = self._parse_csv(self._parse_value) 2532 alias = self._parse_table_alias() 2533 2534 if is_derived: 2535 self._match_r_paren() 2536 2537 return self.expression( 2538 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2539 ) 2540 2541 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2542 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2543 as_modifier and self._match_text_seq("USING", "SAMPLE") 2544 ): 2545 return None 2546 2547 bucket_numerator = None 2548 bucket_denominator = None 2549 bucket_field = None 2550 percent = None 2551 rows = None 2552 size = None 2553 seed = None 2554 2555 kind = ( 2556 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2557 ) 2558 method = self._parse_var(tokens=(TokenType.ROW,)) 2559 2560 self._match(TokenType.L_PAREN) 2561 2562 num = self._parse_number() 2563 2564 if self._match_text_seq("BUCKET"): 2565 bucket_numerator = self._parse_number() 2566 self._match_text_seq("OUT", "OF") 2567 bucket_denominator = bucket_denominator = self._parse_number() 2568 self._match(TokenType.ON) 2569 bucket_field = self._parse_field() 2570 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2571 percent = num 2572 elif self._match(TokenType.ROWS): 2573 rows = num 2574 else: 2575 size = num 2576 2577 self._match(TokenType.R_PAREN) 2578 2579 if self._match(TokenType.L_PAREN): 2580 method = self._parse_var() 2581 seed = self._match(TokenType.COMMA) and self._parse_number() 2582 self._match_r_paren() 2583 elif self._match_texts(("SEED", "REPEATABLE")): 2584 seed = self._parse_wrapped(self._parse_number) 2585 2586 return self.expression( 2587 exp.TableSample, 2588 method=method, 2589 bucket_numerator=bucket_numerator, 2590 bucket_denominator=bucket_denominator, 2591 bucket_field=bucket_field, 2592 percent=percent, 2593 rows=rows, 2594 size=size, 2595 seed=seed, 2596 kind=kind, 2597 ) 2598 2599 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2600 return list(iter(self._parse_pivot, None)) or None 2601 2602 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2603 return list(iter(self._parse_join, None)) or None 2604 2605 # https://duckdb.org/docs/sql/statements/pivot 2606 def _parse_simplified_pivot(self) -> exp.Pivot: 2607 def _parse_on() -> t.Optional[exp.Expression]: 2608 this = self._parse_bitwise() 2609 return self._parse_in(this) if self._match(TokenType.IN) else this 2610 2611 this = self._parse_table() 2612 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2613 using = self._match(TokenType.USING) and self._parse_csv( 2614 lambda: self._parse_alias(self._parse_function()) 2615 ) 2616 group = self._parse_group() 2617 return self.expression( 2618 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2619 ) 2620 2621 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2622 index = self._index 2623 2624 if self._match(TokenType.PIVOT): 2625 unpivot = False 2626 elif self._match(TokenType.UNPIVOT): 2627 unpivot = True 2628 else: 2629 return None 2630 2631 expressions = [] 2632 field = None 2633 2634 if not self._match(TokenType.L_PAREN): 2635 self._retreat(index) 2636 return None 2637 2638 if unpivot: 2639 expressions = self._parse_csv(self._parse_column) 2640 else: 2641 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2642 2643 if not expressions: 2644 self.raise_error("Failed to parse PIVOT's aggregation list") 2645 2646 if not self._match(TokenType.FOR): 2647 self.raise_error("Expecting FOR") 2648 2649 value = self._parse_column() 2650 2651 if not self._match(TokenType.IN): 2652 self.raise_error("Expecting IN") 2653 2654 field = self._parse_in(value, alias=True) 2655 2656 self._match_r_paren() 2657 2658 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2659 2660 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2661 pivot.set("alias", self._parse_table_alias()) 2662 2663 if not unpivot: 2664 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2665 2666 columns: t.List[exp.Expression] = [] 2667 for fld in pivot.args["field"].expressions: 2668 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2669 for name in names: 2670 if self.PREFIXED_PIVOT_COLUMNS: 2671 name = f"{name}_{field_name}" if name else field_name 2672 else: 2673 name = f"{field_name}_{name}" if name else field_name 2674 2675 columns.append(exp.to_identifier(name)) 2676 2677 pivot.set("columns", columns) 2678 2679 return pivot 2680 2681 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2682 return [agg.alias for agg in aggregations] 2683 2684 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2685 if not skip_where_token and not self._match(TokenType.WHERE): 2686 return None 2687 2688 return self.expression( 2689 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2690 ) 2691 2692 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2693 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2694 return None 2695 2696 elements = defaultdict(list) 2697 2698 if self._match(TokenType.ALL): 2699 return self.expression(exp.Group, all=True) 2700 2701 while True: 2702 expressions = self._parse_csv(self._parse_conjunction) 2703 if expressions: 2704 elements["expressions"].extend(expressions) 2705 2706 grouping_sets = self._parse_grouping_sets() 2707 if grouping_sets: 2708 elements["grouping_sets"].extend(grouping_sets) 2709 2710 rollup = None 2711 cube = None 2712 totals = None 2713 2714 with_ = self._match(TokenType.WITH) 2715 if self._match(TokenType.ROLLUP): 2716 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2717 elements["rollup"].extend(ensure_list(rollup)) 2718 2719 if self._match(TokenType.CUBE): 2720 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2721 elements["cube"].extend(ensure_list(cube)) 2722 2723 if self._match_text_seq("TOTALS"): 2724 totals = True 2725 elements["totals"] = True # type: ignore 2726 2727 if not (grouping_sets or rollup or cube or totals): 2728 break 2729 2730 return self.expression(exp.Group, **elements) # type: ignore 2731 2732 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2733 if not self._match(TokenType.GROUPING_SETS): 2734 return None 2735 2736 return self._parse_wrapped_csv(self._parse_grouping_set) 2737 2738 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2739 if self._match(TokenType.L_PAREN): 2740 grouping_set = self._parse_csv(self._parse_column) 2741 self._match_r_paren() 2742 return self.expression(exp.Tuple, expressions=grouping_set) 2743 2744 return self._parse_column() 2745 2746 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2747 if not skip_having_token and not self._match(TokenType.HAVING): 2748 return None 2749 return self.expression(exp.Having, this=self._parse_conjunction()) 2750 2751 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2752 if not self._match(TokenType.QUALIFY): 2753 return None 2754 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2755 2756 def _parse_order( 2757 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2758 ) -> t.Optional[exp.Expression]: 2759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2760 return this 2761 2762 return self.expression( 2763 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2764 ) 2765 2766 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2767 if not self._match(token): 2768 return None 2769 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2770 2771 def _parse_ordered(self) -> exp.Ordered: 2772 this = self._parse_conjunction() 2773 self._match(TokenType.ASC) 2774 2775 is_desc = self._match(TokenType.DESC) 2776 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2777 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2778 desc = is_desc or False 2779 asc = not desc 2780 nulls_first = is_nulls_first or False 2781 explicitly_null_ordered = is_nulls_first or is_nulls_last 2782 2783 if ( 2784 not explicitly_null_ordered 2785 and ( 2786 (asc and self.NULL_ORDERING == "nulls_are_small") 2787 or (desc and self.NULL_ORDERING != "nulls_are_small") 2788 ) 2789 and self.NULL_ORDERING != "nulls_are_last" 2790 ): 2791 nulls_first = True 2792 2793 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2794 2795 def _parse_limit( 2796 self, this: t.Optional[exp.Expression] = None, top: bool = False 2797 ) -> t.Optional[exp.Expression]: 2798 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2799 comments = self._prev_comments 2800 if top: 2801 limit_paren = self._match(TokenType.L_PAREN) 2802 expression = self._parse_number() 2803 2804 if limit_paren: 2805 self._match_r_paren() 2806 else: 2807 expression = self._parse_term() 2808 2809 if self._match(TokenType.COMMA): 2810 offset = expression 2811 expression = self._parse_term() 2812 else: 2813 offset = None 2814 2815 limit_exp = self.expression( 2816 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2817 ) 2818 2819 return limit_exp 2820 2821 if self._match(TokenType.FETCH): 2822 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2823 direction = self._prev.text if direction else "FIRST" 2824 2825 count = self._parse_number() 2826 percent = self._match(TokenType.PERCENT) 2827 2828 self._match_set((TokenType.ROW, TokenType.ROWS)) 2829 2830 only = self._match_text_seq("ONLY") 2831 with_ties = self._match_text_seq("WITH", "TIES") 2832 2833 if only and with_ties: 2834 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2835 2836 return self.expression( 2837 exp.Fetch, 2838 direction=direction, 2839 count=count, 2840 percent=percent, 2841 with_ties=with_ties, 2842 ) 2843 2844 return this 2845 2846 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2847 if not self._match(TokenType.OFFSET): 2848 return this 2849 2850 count = self._parse_term() 2851 self._match_set((TokenType.ROW, TokenType.ROWS)) 2852 return self.expression(exp.Offset, this=this, expression=count) 2853 2854 def _parse_locks(self) -> t.List[exp.Lock]: 2855 locks = [] 2856 while True: 2857 if self._match_text_seq("FOR", "UPDATE"): 2858 update = True 2859 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2860 "LOCK", "IN", "SHARE", "MODE" 2861 ): 2862 update = False 2863 else: 2864 break 2865 2866 expressions = None 2867 if self._match_text_seq("OF"): 2868 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2869 2870 wait: t.Optional[bool | exp.Expression] = None 2871 if self._match_text_seq("NOWAIT"): 2872 wait = True 2873 elif self._match_text_seq("WAIT"): 2874 wait = self._parse_primary() 2875 elif self._match_text_seq("SKIP", "LOCKED"): 2876 wait = False 2877 2878 locks.append( 2879 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2880 ) 2881 2882 return locks 2883 2884 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2885 if not self._match_set(self.SET_OPERATIONS): 2886 return this 2887 2888 token_type = self._prev.token_type 2889 2890 if token_type == TokenType.UNION: 2891 expression = exp.Union 2892 elif token_type == TokenType.EXCEPT: 2893 expression = exp.Except 2894 else: 2895 expression = exp.Intersect 2896 2897 return self.expression( 2898 expression, 2899 this=this, 2900 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2901 expression=self._parse_set_operations(self._parse_select(nested=True)), 2902 ) 2903 2904 def _parse_expression(self) -> t.Optional[exp.Expression]: 2905 return self._parse_alias(self._parse_conjunction()) 2906 2907 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2908 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2909 2910 def _parse_equality(self) -> t.Optional[exp.Expression]: 2911 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2912 2913 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2914 return self._parse_tokens(self._parse_range, self.COMPARISON) 2915 2916 def _parse_range(self) -> t.Optional[exp.Expression]: 2917 this = self._parse_bitwise() 2918 negate = self._match(TokenType.NOT) 2919 2920 if self._match_set(self.RANGE_PARSERS): 2921 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2922 if not expression: 2923 return this 2924 2925 this = expression 2926 elif self._match(TokenType.ISNULL): 2927 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2928 2929 # Postgres supports ISNULL and NOTNULL for conditions. 2930 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2931 if self._match(TokenType.NOTNULL): 2932 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2933 this = self.expression(exp.Not, this=this) 2934 2935 if negate: 2936 this = self.expression(exp.Not, this=this) 2937 2938 if self._match(TokenType.IS): 2939 this = self._parse_is(this) 2940 2941 return this 2942 2943 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2944 index = self._index - 1 2945 negate = self._match(TokenType.NOT) 2946 2947 if self._match_text_seq("DISTINCT", "FROM"): 2948 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2949 return self.expression(klass, this=this, expression=self._parse_expression()) 2950 2951 expression = self._parse_null() or self._parse_boolean() 2952 if not expression: 2953 self._retreat(index) 2954 return None 2955 2956 this = self.expression(exp.Is, this=this, expression=expression) 2957 return self.expression(exp.Not, this=this) if negate else this 2958 2959 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2960 unnest = self._parse_unnest(with_alias=False) 2961 if unnest: 2962 this = self.expression(exp.In, this=this, unnest=unnest) 2963 elif self._match(TokenType.L_PAREN): 2964 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2965 2966 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2967 this = self.expression(exp.In, this=this, query=expressions[0]) 2968 else: 2969 this = self.expression(exp.In, this=this, expressions=expressions) 2970 2971 self._match_r_paren(this) 2972 else: 2973 this = self.expression(exp.In, this=this, field=self._parse_field()) 2974 2975 return this 2976 2977 def _parse_between(self, this: exp.Expression) -> exp.Between: 2978 low = self._parse_bitwise() 2979 self._match(TokenType.AND) 2980 high = self._parse_bitwise() 2981 return self.expression(exp.Between, this=this, low=low, high=high) 2982 2983 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2984 if not self._match(TokenType.ESCAPE): 2985 return this 2986 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2987 2988 def _parse_interval(self) -> t.Optional[exp.Interval]: 2989 if not self._match(TokenType.INTERVAL): 2990 return None 2991 2992 if self._match(TokenType.STRING, advance=False): 2993 this = self._parse_primary() 2994 else: 2995 this = self._parse_term() 2996 2997 unit = self._parse_function() or self._parse_var() 2998 2999 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3000 # each INTERVAL expression into this canonical form so it's easy to transpile 3001 if this and this.is_number: 3002 this = exp.Literal.string(this.name) 3003 elif this and this.is_string: 3004 parts = this.name.split() 3005 3006 if len(parts) == 2: 3007 if unit: 3008 # this is not actually a unit, it's something else 3009 unit = None 3010 self._retreat(self._index - 1) 3011 else: 3012 this = exp.Literal.string(parts[0]) 3013 unit = self.expression(exp.Var, this=parts[1]) 3014 3015 return self.expression(exp.Interval, this=this, unit=unit) 3016 3017 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3018 this = self._parse_term() 3019 3020 while True: 3021 if self._match_set(self.BITWISE): 3022 this = self.expression( 3023 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3024 ) 3025 elif self._match_pair(TokenType.LT, TokenType.LT): 3026 this = self.expression( 3027 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3028 ) 3029 elif self._match_pair(TokenType.GT, TokenType.GT): 3030 this = self.expression( 3031 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3032 ) 3033 else: 3034 break 3035 3036 return this 3037 3038 def _parse_term(self) -> t.Optional[exp.Expression]: 3039 return self._parse_tokens(self._parse_factor, self.TERM) 3040 3041 def _parse_factor(self) -> t.Optional[exp.Expression]: 3042 return self._parse_tokens(self._parse_unary, self.FACTOR) 3043 3044 def _parse_unary(self) -> t.Optional[exp.Expression]: 3045 if self._match_set(self.UNARY_PARSERS): 3046 return self.UNARY_PARSERS[self._prev.token_type](self) 3047 return self._parse_at_time_zone(self._parse_type()) 3048 3049 def _parse_type(self) -> t.Optional[exp.Expression]: 3050 interval = self._parse_interval() 3051 if interval: 3052 return interval 3053 3054 index = self._index 3055 data_type = self._parse_types(check_func=True) 3056 this = self._parse_column() 3057 3058 if data_type: 3059 if isinstance(this, exp.Literal): 3060 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3061 if parser: 3062 return parser(self, this, data_type) 3063 return self.expression(exp.Cast, this=this, to=data_type) 3064 if not data_type.expressions: 3065 self._retreat(index) 3066 return self._parse_column() 3067 return self._parse_column_ops(data_type) 3068 3069 return this 3070 3071 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3072 this = self._parse_type() 3073 if not this: 3074 return None 3075 3076 return self.expression( 3077 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3078 ) 3079 3080 def _parse_types( 3081 self, check_func: bool = False, schema: bool = False 3082 ) -> t.Optional[exp.Expression]: 3083 index = self._index 3084 3085 prefix = self._match_text_seq("SYSUDTLIB", ".") 3086 3087 if not self._match_set(self.TYPE_TOKENS): 3088 return None 3089 3090 type_token = self._prev.token_type 3091 3092 if type_token == TokenType.PSEUDO_TYPE: 3093 return self.expression(exp.PseudoType, this=self._prev.text) 3094 3095 nested = type_token in self.NESTED_TYPE_TOKENS 3096 is_struct = type_token == TokenType.STRUCT 3097 expressions = None 3098 maybe_func = False 3099 3100 if self._match(TokenType.L_PAREN): 3101 if is_struct: 3102 expressions = self._parse_csv(self._parse_struct_types) 3103 elif nested: 3104 expressions = self._parse_csv( 3105 lambda: self._parse_types(check_func=check_func, schema=schema) 3106 ) 3107 elif type_token in self.ENUM_TYPE_TOKENS: 3108 expressions = self._parse_csv(self._parse_primary) 3109 else: 3110 expressions = self._parse_csv(self._parse_type_size) 3111 3112 if not expressions or not self._match(TokenType.R_PAREN): 3113 self._retreat(index) 3114 return None 3115 3116 maybe_func = True 3117 3118 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3119 this = exp.DataType( 3120 this=exp.DataType.Type.ARRAY, 3121 expressions=[ 3122 exp.DataType( 3123 this=exp.DataType.Type[type_token.value], 3124 expressions=expressions, 3125 nested=nested, 3126 ) 3127 ], 3128 nested=True, 3129 ) 3130 3131 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3132 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3133 3134 return this 3135 3136 if self._match(TokenType.L_BRACKET): 3137 self._retreat(index) 3138 return None 3139 3140 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3141 if nested and self._match(TokenType.LT): 3142 if is_struct: 3143 expressions = self._parse_csv(self._parse_struct_types) 3144 else: 3145 expressions = self._parse_csv( 3146 lambda: self._parse_types(check_func=check_func, schema=schema) 3147 ) 3148 3149 if not self._match(TokenType.GT): 3150 self.raise_error("Expecting >") 3151 3152 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3153 values = self._parse_csv(self._parse_conjunction) 3154 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3155 3156 value: t.Optional[exp.Expression] = None 3157 if type_token in self.TIMESTAMPS: 3158 if self._match_text_seq("WITH", "TIME", "ZONE"): 3159 maybe_func = False 3160 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3161 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3162 maybe_func = False 3163 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3164 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3165 maybe_func = False 3166 elif type_token == TokenType.INTERVAL: 3167 unit = self._parse_var() 3168 3169 if not unit: 3170 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3171 else: 3172 value = self.expression(exp.Interval, unit=unit) 3173 3174 if maybe_func and check_func: 3175 index2 = self._index 3176 peek = self._parse_string() 3177 3178 if not peek: 3179 self._retreat(index) 3180 return None 3181 3182 self._retreat(index2) 3183 3184 if value: 3185 return value 3186 3187 return exp.DataType( 3188 this=exp.DataType.Type[type_token.value], 3189 expressions=expressions, 3190 nested=nested, 3191 values=values, 3192 prefix=prefix, 3193 ) 3194 3195 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3196 this = self._parse_type() or self._parse_id_var() 3197 self._match(TokenType.COLON) 3198 return self._parse_column_def(this) 3199 3200 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3201 if not self._match_text_seq("AT", "TIME", "ZONE"): 3202 return this 3203 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3204 3205 def _parse_column(self) -> t.Optional[exp.Expression]: 3206 this = self._parse_field() 3207 if isinstance(this, exp.Identifier): 3208 this = self.expression(exp.Column, this=this) 3209 elif not this: 3210 return self._parse_bracket(this) 3211 return self._parse_column_ops(this) 3212 3213 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3214 this = self._parse_bracket(this) 3215 3216 while self._match_set(self.COLUMN_OPERATORS): 3217 op_token = self._prev.token_type 3218 op = self.COLUMN_OPERATORS.get(op_token) 3219 3220 if op_token == TokenType.DCOLON: 3221 field = self._parse_types() 3222 if not field: 3223 self.raise_error("Expected type") 3224 elif op and self._curr: 3225 self._advance() 3226 value = self._prev.text 3227 field = ( 3228 exp.Literal.number(value) 3229 if self._prev.token_type == TokenType.NUMBER 3230 else exp.Literal.string(value) 3231 ) 3232 else: 3233 field = self._parse_field(anonymous_func=True, any_token=True) 3234 3235 if isinstance(field, exp.Func): 3236 # bigquery allows function calls like x.y.count(...) 3237 # SAFE.SUBSTR(...) 3238 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3239 this = self._replace_columns_with_dots(this) 3240 3241 if op: 3242 this = op(self, this, field) 3243 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3244 this = self.expression( 3245 exp.Column, 3246 this=field, 3247 table=this.this, 3248 db=this.args.get("table"), 3249 catalog=this.args.get("db"), 3250 ) 3251 else: 3252 this = self.expression(exp.Dot, this=this, expression=field) 3253 this = self._parse_bracket(this) 3254 return this 3255 3256 def _parse_primary(self) -> t.Optional[exp.Expression]: 3257 if self._match_set(self.PRIMARY_PARSERS): 3258 token_type = self._prev.token_type 3259 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3260 3261 if token_type == TokenType.STRING: 3262 expressions = [primary] 3263 while self._match(TokenType.STRING): 3264 expressions.append(exp.Literal.string(self._prev.text)) 3265 3266 if len(expressions) > 1: 3267 return self.expression(exp.Concat, expressions=expressions) 3268 3269 return primary 3270 3271 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3272 return exp.Literal.number(f"0.{self._prev.text}") 3273 3274 if self._match(TokenType.L_PAREN): 3275 comments = self._prev_comments 3276 query = self._parse_select() 3277 3278 if query: 3279 expressions = [query] 3280 else: 3281 expressions = self._parse_expressions() 3282 3283 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3284 3285 if isinstance(this, exp.Subqueryable): 3286 this = self._parse_set_operations( 3287 self._parse_subquery(this=this, parse_alias=False) 3288 ) 3289 elif len(expressions) > 1: 3290 this = self.expression(exp.Tuple, expressions=expressions) 3291 else: 3292 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3293 3294 if this: 3295 this.add_comments(comments) 3296 3297 self._match_r_paren(expression=this) 3298 return this 3299 3300 return None 3301 3302 def _parse_field( 3303 self, 3304 any_token: bool = False, 3305 tokens: t.Optional[t.Collection[TokenType]] = None, 3306 anonymous_func: bool = False, 3307 ) -> t.Optional[exp.Expression]: 3308 return ( 3309 self._parse_primary() 3310 or self._parse_function(anonymous=anonymous_func) 3311 or self._parse_id_var(any_token=any_token, tokens=tokens) 3312 ) 3313 3314 def _parse_function( 3315 self, 3316 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3317 anonymous: bool = False, 3318 optional_parens: bool = True, 3319 ) -> t.Optional[exp.Expression]: 3320 if not self._curr: 3321 return None 3322 3323 token_type = self._curr.token_type 3324 3325 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3326 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3327 3328 if not self._next or self._next.token_type != TokenType.L_PAREN: 3329 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3330 self._advance() 3331 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3332 3333 return None 3334 3335 if token_type not in self.FUNC_TOKENS: 3336 return None 3337 3338 this = self._curr.text 3339 upper = this.upper() 3340 self._advance(2) 3341 3342 parser = self.FUNCTION_PARSERS.get(upper) 3343 3344 if parser and not anonymous: 3345 this = parser(self) 3346 else: 3347 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3348 3349 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3350 this = self.expression(subquery_predicate, this=self._parse_select()) 3351 self._match_r_paren() 3352 return this 3353 3354 if functions is None: 3355 functions = self.FUNCTIONS 3356 3357 function = functions.get(upper) 3358 3359 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3360 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3361 3362 if function and not anonymous: 3363 this = self.validate_expression(function(args), args) 3364 else: 3365 this = self.expression(exp.Anonymous, this=this, expressions=args) 3366 3367 self._match(TokenType.R_PAREN, expression=this) 3368 return self._parse_window(this) 3369 3370 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3371 return self._parse_column_def(self._parse_id_var()) 3372 3373 def _parse_user_defined_function( 3374 self, kind: t.Optional[TokenType] = None 3375 ) -> t.Optional[exp.Expression]: 3376 this = self._parse_id_var() 3377 3378 while self._match(TokenType.DOT): 3379 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3380 3381 if not self._match(TokenType.L_PAREN): 3382 return this 3383 3384 expressions = self._parse_csv(self._parse_function_parameter) 3385 self._match_r_paren() 3386 return self.expression( 3387 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3388 ) 3389 3390 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3391 literal = self._parse_primary() 3392 if literal: 3393 return self.expression(exp.Introducer, this=token.text, expression=literal) 3394 3395 return self.expression(exp.Identifier, this=token.text) 3396 3397 def _parse_session_parameter(self) -> exp.SessionParameter: 3398 kind = None 3399 this = self._parse_id_var() or self._parse_primary() 3400 3401 if this and self._match(TokenType.DOT): 3402 kind = this.name 3403 this = self._parse_var() or self._parse_primary() 3404 3405 return self.expression(exp.SessionParameter, this=this, kind=kind) 3406 3407 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3408 index = self._index 3409 3410 if self._match(TokenType.L_PAREN): 3411 expressions = self._parse_csv(self._parse_id_var) 3412 3413 if not self._match(TokenType.R_PAREN): 3414 self._retreat(index) 3415 else: 3416 expressions = [self._parse_id_var()] 3417 3418 if self._match_set(self.LAMBDAS): 3419 return self.LAMBDAS[self._prev.token_type](self, expressions) 3420 3421 self._retreat(index) 3422 3423 this: t.Optional[exp.Expression] 3424 3425 if self._match(TokenType.DISTINCT): 3426 this = self.expression( 3427 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3428 ) 3429 else: 3430 this = self._parse_select_or_expression(alias=alias) 3431 3432 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3433 3434 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3435 index = self._index 3436 3437 if not self.errors: 3438 try: 3439 if self._parse_select(nested=True): 3440 return this 3441 except ParseError: 3442 pass 3443 finally: 3444 self.errors.clear() 3445 self._retreat(index) 3446 3447 if not self._match(TokenType.L_PAREN): 3448 return this 3449 3450 args = self._parse_csv( 3451 lambda: self._parse_constraint() 3452 or self._parse_column_def(self._parse_field(any_token=True)) 3453 ) 3454 3455 self._match_r_paren() 3456 return self.expression(exp.Schema, this=this, expressions=args) 3457 3458 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3459 # column defs are not really columns, they're identifiers 3460 if isinstance(this, exp.Column): 3461 this = this.this 3462 3463 kind = self._parse_types(schema=True) 3464 3465 if self._match_text_seq("FOR", "ORDINALITY"): 3466 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3467 3468 constraints = [] 3469 while True: 3470 constraint = self._parse_column_constraint() 3471 if not constraint: 3472 break 3473 constraints.append(constraint) 3474 3475 if not kind and not constraints: 3476 return this 3477 3478 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3479 3480 def _parse_auto_increment( 3481 self, 3482 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3483 start = None 3484 increment = None 3485 3486 if self._match(TokenType.L_PAREN, advance=False): 3487 args = self._parse_wrapped_csv(self._parse_bitwise) 3488 start = seq_get(args, 0) 3489 increment = seq_get(args, 1) 3490 elif self._match_text_seq("START"): 3491 start = self._parse_bitwise() 3492 self._match_text_seq("INCREMENT") 3493 increment = self._parse_bitwise() 3494 3495 if start and increment: 3496 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3497 3498 return exp.AutoIncrementColumnConstraint() 3499 3500 def _parse_compress(self) -> exp.CompressColumnConstraint: 3501 if self._match(TokenType.L_PAREN, advance=False): 3502 return self.expression( 3503 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3504 ) 3505 3506 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3507 3508 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3509 if self._match_text_seq("BY", "DEFAULT"): 3510 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3511 this = self.expression( 3512 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3513 ) 3514 else: 3515 self._match_text_seq("ALWAYS") 3516 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3517 3518 self._match(TokenType.ALIAS) 3519 identity = self._match_text_seq("IDENTITY") 3520 3521 if self._match(TokenType.L_PAREN): 3522 if self._match_text_seq("START", "WITH"): 3523 this.set("start", self._parse_bitwise()) 3524 if self._match_text_seq("INCREMENT", "BY"): 3525 this.set("increment", self._parse_bitwise()) 3526 if self._match_text_seq("MINVALUE"): 3527 this.set("minvalue", self._parse_bitwise()) 3528 if self._match_text_seq("MAXVALUE"): 3529 this.set("maxvalue", self._parse_bitwise()) 3530 3531 if self._match_text_seq("CYCLE"): 3532 this.set("cycle", True) 3533 elif self._match_text_seq("NO", "CYCLE"): 3534 this.set("cycle", False) 3535 3536 if not identity: 3537 this.set("expression", self._parse_bitwise()) 3538 3539 self._match_r_paren() 3540 3541 return this 3542 3543 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3544 self._match_text_seq("LENGTH") 3545 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3546 3547 def _parse_not_constraint( 3548 self, 3549 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3550 if self._match_text_seq("NULL"): 3551 return self.expression(exp.NotNullColumnConstraint) 3552 if self._match_text_seq("CASESPECIFIC"): 3553 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3554 return None 3555 3556 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3557 if self._match(TokenType.CONSTRAINT): 3558 this = self._parse_id_var() 3559 else: 3560 this = None 3561 3562 if self._match_texts(self.CONSTRAINT_PARSERS): 3563 return self.expression( 3564 exp.ColumnConstraint, 3565 this=this, 3566 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3567 ) 3568 3569 return this 3570 3571 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3572 if not self._match(TokenType.CONSTRAINT): 3573 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3574 3575 this = self._parse_id_var() 3576 expressions = [] 3577 3578 while True: 3579 constraint = self._parse_unnamed_constraint() or self._parse_function() 3580 if not constraint: 3581 break 3582 expressions.append(constraint) 3583 3584 return self.expression(exp.Constraint, this=this, expressions=expressions) 3585 3586 def _parse_unnamed_constraint( 3587 self, constraints: t.Optional[t.Collection[str]] = None 3588 ) -> t.Optional[exp.Expression]: 3589 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3590 return None 3591 3592 constraint = self._prev.text.upper() 3593 if constraint not in self.CONSTRAINT_PARSERS: 3594 self.raise_error(f"No parser found for schema constraint {constraint}.") 3595 3596 return self.CONSTRAINT_PARSERS[constraint](self) 3597 3598 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3599 self._match_text_seq("KEY") 3600 return self.expression( 3601 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3602 ) 3603 3604 def _parse_key_constraint_options(self) -> t.List[str]: 3605 options = [] 3606 while True: 3607 if not self._curr: 3608 break 3609 3610 if self._match(TokenType.ON): 3611 action = None 3612 on = self._advance_any() and self._prev.text 3613 3614 if self._match_text_seq("NO", "ACTION"): 3615 action = "NO ACTION" 3616 elif self._match_text_seq("CASCADE"): 3617 action = "CASCADE" 3618 elif self._match_pair(TokenType.SET, TokenType.NULL): 3619 action = "SET NULL" 3620 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3621 action = "SET DEFAULT" 3622 else: 3623 self.raise_error("Invalid key constraint") 3624 3625 options.append(f"ON {on} {action}") 3626 elif self._match_text_seq("NOT", "ENFORCED"): 3627 options.append("NOT ENFORCED") 3628 elif self._match_text_seq("DEFERRABLE"): 3629 options.append("DEFERRABLE") 3630 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3631 options.append("INITIALLY DEFERRED") 3632 elif self._match_text_seq("NORELY"): 3633 options.append("NORELY") 3634 elif self._match_text_seq("MATCH", "FULL"): 3635 options.append("MATCH FULL") 3636 else: 3637 break 3638 3639 return options 3640 3641 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3642 if match and not self._match(TokenType.REFERENCES): 3643 return None 3644 3645 expressions = None 3646 this = self._parse_table(schema=True) 3647 options = self._parse_key_constraint_options() 3648 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3649 3650 def _parse_foreign_key(self) -> exp.ForeignKey: 3651 expressions = self._parse_wrapped_id_vars() 3652 reference = self._parse_references() 3653 options = {} 3654 3655 while self._match(TokenType.ON): 3656 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3657 self.raise_error("Expected DELETE or UPDATE") 3658 3659 kind = self._prev.text.lower() 3660 3661 if self._match_text_seq("NO", "ACTION"): 3662 action = "NO ACTION" 3663 elif self._match(TokenType.SET): 3664 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3665 action = "SET " + self._prev.text.upper() 3666 else: 3667 self._advance() 3668 action = self._prev.text.upper() 3669 3670 options[kind] = action 3671 3672 return self.expression( 3673 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3674 ) 3675 3676 def _parse_primary_key( 3677 self, wrapped_optional: bool = False, in_props: bool = False 3678 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3679 desc = ( 3680 self._match_set((TokenType.ASC, TokenType.DESC)) 3681 and self._prev.token_type == TokenType.DESC 3682 ) 3683 3684 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3685 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3686 3687 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3688 options = self._parse_key_constraint_options() 3689 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3690 3691 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3692 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3693 return this 3694 3695 bracket_kind = self._prev.token_type 3696 3697 if self._match(TokenType.COLON): 3698 expressions: t.List[t.Optional[exp.Expression]] = [ 3699 self.expression(exp.Slice, expression=self._parse_conjunction()) 3700 ] 3701 else: 3702 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3703 3704 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3705 if bracket_kind == TokenType.L_BRACE: 3706 this = self.expression(exp.Struct, expressions=expressions) 3707 elif not this or this.name.upper() == "ARRAY": 3708 this = self.expression(exp.Array, expressions=expressions) 3709 else: 3710 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3711 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3712 3713 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3714 self.raise_error("Expected ]") 3715 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3716 self.raise_error("Expected }") 3717 3718 self._add_comments(this) 3719 return self._parse_bracket(this) 3720 3721 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3722 if self._match(TokenType.COLON): 3723 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3724 return this 3725 3726 def _parse_case(self) -> t.Optional[exp.Expression]: 3727 ifs = [] 3728 default = None 3729 3730 expression = self._parse_conjunction() 3731 3732 while self._match(TokenType.WHEN): 3733 this = self._parse_conjunction() 3734 self._match(TokenType.THEN) 3735 then = self._parse_conjunction() 3736 ifs.append(self.expression(exp.If, this=this, true=then)) 3737 3738 if self._match(TokenType.ELSE): 3739 default = self._parse_conjunction() 3740 3741 if not self._match(TokenType.END): 3742 self.raise_error("Expected END after CASE", self._prev) 3743 3744 return self._parse_window( 3745 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3746 ) 3747 3748 def _parse_if(self) -> t.Optional[exp.Expression]: 3749 if self._match(TokenType.L_PAREN): 3750 args = self._parse_csv(self._parse_conjunction) 3751 this = self.validate_expression(exp.If.from_arg_list(args), args) 3752 self._match_r_paren() 3753 else: 3754 index = self._index - 1 3755 condition = self._parse_conjunction() 3756 3757 if not condition: 3758 self._retreat(index) 3759 return None 3760 3761 self._match(TokenType.THEN) 3762 true = self._parse_conjunction() 3763 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3764 self._match(TokenType.END) 3765 this = self.expression(exp.If, this=condition, true=true, false=false) 3766 3767 return self._parse_window(this) 3768 3769 def _parse_extract(self) -> exp.Extract: 3770 this = self._parse_function() or self._parse_var() or self._parse_type() 3771 3772 if self._match(TokenType.FROM): 3773 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3774 3775 if not self._match(TokenType.COMMA): 3776 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3777 3778 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3779 3780 def _parse_any_value(self) -> exp.AnyValue: 3781 this = self._parse_lambda() 3782 is_max = None 3783 having = None 3784 3785 if self._match(TokenType.HAVING): 3786 self._match_texts(("MAX", "MIN")) 3787 is_max = self._prev.text == "MAX" 3788 having = self._parse_column() 3789 3790 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3791 3792 def _parse_cast(self, strict: bool) -> exp.Expression: 3793 this = self._parse_conjunction() 3794 3795 if not self._match(TokenType.ALIAS): 3796 if self._match(TokenType.COMMA): 3797 return self.expression( 3798 exp.CastToStrType, this=this, expression=self._parse_string() 3799 ) 3800 else: 3801 self.raise_error("Expected AS after CAST") 3802 3803 fmt = None 3804 to = self._parse_types() 3805 3806 if not to: 3807 self.raise_error("Expected TYPE after CAST") 3808 elif to.this == exp.DataType.Type.CHAR: 3809 if self._match(TokenType.CHARACTER_SET): 3810 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3811 elif self._match(TokenType.FORMAT): 3812 fmt_string = self._parse_string() 3813 fmt = self._parse_at_time_zone(fmt_string) 3814 3815 if to.this in exp.DataType.TEMPORAL_TYPES: 3816 this = self.expression( 3817 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3818 this=this, 3819 format=exp.Literal.string( 3820 format_time( 3821 fmt_string.this if fmt_string else "", 3822 self.FORMAT_MAPPING or self.TIME_MAPPING, 3823 self.FORMAT_TRIE or self.TIME_TRIE, 3824 ) 3825 ), 3826 ) 3827 3828 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3829 this.set("zone", fmt.args["zone"]) 3830 3831 return this 3832 3833 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3834 3835 def _parse_concat(self) -> t.Optional[exp.Expression]: 3836 args = self._parse_csv(self._parse_conjunction) 3837 if self.CONCAT_NULL_OUTPUTS_STRING: 3838 args = [ 3839 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3840 for arg in args 3841 if arg 3842 ] 3843 3844 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3845 # we find such a call we replace it with its argument. 3846 if len(args) == 1: 3847 return args[0] 3848 3849 return self.expression( 3850 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3851 ) 3852 3853 def _parse_string_agg(self) -> exp.Expression: 3854 if self._match(TokenType.DISTINCT): 3855 args: t.List[t.Optional[exp.Expression]] = [ 3856 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3857 ] 3858 if self._match(TokenType.COMMA): 3859 args.extend(self._parse_csv(self._parse_conjunction)) 3860 else: 3861 args = self._parse_csv(self._parse_conjunction) 3862 3863 index = self._index 3864 if not self._match(TokenType.R_PAREN) and args: 3865 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3866 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3867 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3868 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3869 3870 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3871 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3872 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3873 if not self._match_text_seq("WITHIN", "GROUP"): 3874 self._retreat(index) 3875 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3876 3877 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3878 order = self._parse_order(this=seq_get(args, 0)) 3879 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3880 3881 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3882 this = self._parse_bitwise() 3883 3884 if self._match(TokenType.USING): 3885 to: t.Optional[exp.Expression] = self.expression( 3886 exp.CharacterSet, this=self._parse_var() 3887 ) 3888 elif self._match(TokenType.COMMA): 3889 to = self._parse_types() 3890 else: 3891 to = None 3892 3893 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3894 3895 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3896 """ 3897 There are generally two variants of the DECODE function: 3898 3899 - DECODE(bin, charset) 3900 - DECODE(expression, search, result [, search, result] ... [, default]) 3901 3902 The second variant will always be parsed into a CASE expression. Note that NULL 3903 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3904 instead of relying on pattern matching. 3905 """ 3906 args = self._parse_csv(self._parse_conjunction) 3907 3908 if len(args) < 3: 3909 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3910 3911 expression, *expressions = args 3912 if not expression: 3913 return None 3914 3915 ifs = [] 3916 for search, result in zip(expressions[::2], expressions[1::2]): 3917 if not search or not result: 3918 return None 3919 3920 if isinstance(search, exp.Literal): 3921 ifs.append( 3922 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3923 ) 3924 elif isinstance(search, exp.Null): 3925 ifs.append( 3926 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3927 ) 3928 else: 3929 cond = exp.or_( 3930 exp.EQ(this=expression.copy(), expression=search), 3931 exp.and_( 3932 exp.Is(this=expression.copy(), expression=exp.Null()), 3933 exp.Is(this=search.copy(), expression=exp.Null()), 3934 copy=False, 3935 ), 3936 copy=False, 3937 ) 3938 ifs.append(exp.If(this=cond, true=result)) 3939 3940 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3941 3942 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3943 self._match_text_seq("KEY") 3944 key = self._parse_field() 3945 self._match(TokenType.COLON) 3946 self._match_text_seq("VALUE") 3947 value = self._parse_field() 3948 3949 if not key and not value: 3950 return None 3951 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3952 3953 def _parse_json_object(self) -> exp.JSONObject: 3954 star = self._parse_star() 3955 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3956 3957 null_handling = None 3958 if self._match_text_seq("NULL", "ON", "NULL"): 3959 null_handling = "NULL ON NULL" 3960 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3961 null_handling = "ABSENT ON NULL" 3962 3963 unique_keys = None 3964 if self._match_text_seq("WITH", "UNIQUE"): 3965 unique_keys = True 3966 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3967 unique_keys = False 3968 3969 self._match_text_seq("KEYS") 3970 3971 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3972 format_json = self._match_text_seq("FORMAT", "JSON") 3973 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3974 3975 return self.expression( 3976 exp.JSONObject, 3977 expressions=expressions, 3978 null_handling=null_handling, 3979 unique_keys=unique_keys, 3980 return_type=return_type, 3981 format_json=format_json, 3982 encoding=encoding, 3983 ) 3984 3985 def _parse_logarithm(self) -> exp.Func: 3986 # Default argument order is base, expression 3987 args = self._parse_csv(self._parse_range) 3988 3989 if len(args) > 1: 3990 if not self.LOG_BASE_FIRST: 3991 args.reverse() 3992 return exp.Log.from_arg_list(args) 3993 3994 return self.expression( 3995 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3996 ) 3997 3998 def _parse_match_against(self) -> exp.MatchAgainst: 3999 expressions = self._parse_csv(self._parse_column) 4000 4001 self._match_text_seq(")", "AGAINST", "(") 4002 4003 this = self._parse_string() 4004 4005 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4006 modifier = "IN NATURAL LANGUAGE MODE" 4007 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4008 modifier = f"{modifier} WITH QUERY EXPANSION" 4009 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4010 modifier = "IN BOOLEAN MODE" 4011 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4012 modifier = "WITH QUERY EXPANSION" 4013 else: 4014 modifier = None 4015 4016 return self.expression( 4017 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4018 ) 4019 4020 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4021 def _parse_open_json(self) -> exp.OpenJSON: 4022 this = self._parse_bitwise() 4023 path = self._match(TokenType.COMMA) and self._parse_string() 4024 4025 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4026 this = self._parse_field(any_token=True) 4027 kind = self._parse_types() 4028 path = self._parse_string() 4029 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4030 4031 return self.expression( 4032 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4033 ) 4034 4035 expressions = None 4036 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4037 self._match_l_paren() 4038 expressions = self._parse_csv(_parse_open_json_column_def) 4039 4040 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4041 4042 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4043 args = self._parse_csv(self._parse_bitwise) 4044 4045 if self._match(TokenType.IN): 4046 return self.expression( 4047 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4048 ) 4049 4050 if haystack_first: 4051 haystack = seq_get(args, 0) 4052 needle = seq_get(args, 1) 4053 else: 4054 needle = seq_get(args, 0) 4055 haystack = seq_get(args, 1) 4056 4057 return self.expression( 4058 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4059 ) 4060 4061 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4062 args = self._parse_csv(self._parse_table) 4063 return exp.JoinHint(this=func_name.upper(), expressions=args) 4064 4065 def _parse_substring(self) -> exp.Substring: 4066 # Postgres supports the form: substring(string [from int] [for int]) 4067 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4068 4069 args = self._parse_csv(self._parse_bitwise) 4070 4071 if self._match(TokenType.FROM): 4072 args.append(self._parse_bitwise()) 4073 if self._match(TokenType.FOR): 4074 args.append(self._parse_bitwise()) 4075 4076 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4077 4078 def _parse_trim(self) -> exp.Trim: 4079 # https://www.w3resource.com/sql/character-functions/trim.php 4080 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4081 4082 position = None 4083 collation = None 4084 4085 if self._match_texts(self.TRIM_TYPES): 4086 position = self._prev.text.upper() 4087 4088 expression = self._parse_bitwise() 4089 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4090 this = self._parse_bitwise() 4091 else: 4092 this = expression 4093 expression = None 4094 4095 if self._match(TokenType.COLLATE): 4096 collation = self._parse_bitwise() 4097 4098 return self.expression( 4099 exp.Trim, this=this, position=position, expression=expression, collation=collation 4100 ) 4101 4102 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4103 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4104 4105 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4106 return self._parse_window(self._parse_id_var(), alias=True) 4107 4108 def _parse_respect_or_ignore_nulls( 4109 self, this: t.Optional[exp.Expression] 4110 ) -> t.Optional[exp.Expression]: 4111 if self._match_text_seq("IGNORE", "NULLS"): 4112 return self.expression(exp.IgnoreNulls, this=this) 4113 if self._match_text_seq("RESPECT", "NULLS"): 4114 return self.expression(exp.RespectNulls, this=this) 4115 return this 4116 4117 def _parse_window( 4118 self, this: t.Optional[exp.Expression], alias: bool = False 4119 ) -> t.Optional[exp.Expression]: 4120 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4121 self._match(TokenType.WHERE) 4122 this = self.expression( 4123 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4124 ) 4125 self._match_r_paren() 4126 4127 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4128 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4129 if self._match_text_seq("WITHIN", "GROUP"): 4130 order = self._parse_wrapped(self._parse_order) 4131 this = self.expression(exp.WithinGroup, this=this, expression=order) 4132 4133 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4134 # Some dialects choose to implement and some do not. 4135 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4136 4137 # There is some code above in _parse_lambda that handles 4138 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4139 4140 # The below changes handle 4141 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4142 4143 # Oracle allows both formats 4144 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4145 # and Snowflake chose to do the same for familiarity 4146 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4147 this = self._parse_respect_or_ignore_nulls(this) 4148 4149 # bigquery select from window x AS (partition by ...) 4150 if alias: 4151 over = None 4152 self._match(TokenType.ALIAS) 4153 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4154 return this 4155 else: 4156 over = self._prev.text.upper() 4157 4158 if not self._match(TokenType.L_PAREN): 4159 return self.expression( 4160 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4161 ) 4162 4163 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4164 4165 first = self._match(TokenType.FIRST) 4166 if self._match_text_seq("LAST"): 4167 first = False 4168 4169 partition = self._parse_partition_by() 4170 order = self._parse_order() 4171 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4172 4173 if kind: 4174 self._match(TokenType.BETWEEN) 4175 start = self._parse_window_spec() 4176 self._match(TokenType.AND) 4177 end = self._parse_window_spec() 4178 4179 spec = self.expression( 4180 exp.WindowSpec, 4181 kind=kind, 4182 start=start["value"], 4183 start_side=start["side"], 4184 end=end["value"], 4185 end_side=end["side"], 4186 ) 4187 else: 4188 spec = None 4189 4190 self._match_r_paren() 4191 4192 window = self.expression( 4193 exp.Window, 4194 this=this, 4195 partition_by=partition, 4196 order=order, 4197 spec=spec, 4198 alias=window_alias, 4199 over=over, 4200 first=first, 4201 ) 4202 4203 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4204 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4205 return self._parse_window(window, alias=alias) 4206 4207 return window 4208 4209 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4210 self._match(TokenType.BETWEEN) 4211 4212 return { 4213 "value": ( 4214 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4215 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4216 or self._parse_bitwise() 4217 ), 4218 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4219 } 4220 4221 def _parse_alias( 4222 self, this: t.Optional[exp.Expression], explicit: bool = False 4223 ) -> t.Optional[exp.Expression]: 4224 any_token = self._match(TokenType.ALIAS) 4225 4226 if explicit and not any_token: 4227 return this 4228 4229 if self._match(TokenType.L_PAREN): 4230 aliases = self.expression( 4231 exp.Aliases, 4232 this=this, 4233 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4234 ) 4235 self._match_r_paren(aliases) 4236 return aliases 4237 4238 alias = self._parse_id_var(any_token) 4239 4240 if alias: 4241 return self.expression(exp.Alias, this=this, alias=alias) 4242 4243 return this 4244 4245 def _parse_id_var( 4246 self, 4247 any_token: bool = True, 4248 tokens: t.Optional[t.Collection[TokenType]] = None, 4249 ) -> t.Optional[exp.Expression]: 4250 identifier = self._parse_identifier() 4251 4252 if identifier: 4253 return identifier 4254 4255 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4256 quoted = self._prev.token_type == TokenType.STRING 4257 return exp.Identifier(this=self._prev.text, quoted=quoted) 4258 4259 return None 4260 4261 def _parse_string(self) -> t.Optional[exp.Expression]: 4262 if self._match(TokenType.STRING): 4263 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4264 return self._parse_placeholder() 4265 4266 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4267 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4268 4269 def _parse_number(self) -> t.Optional[exp.Expression]: 4270 if self._match(TokenType.NUMBER): 4271 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4272 return self._parse_placeholder() 4273 4274 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4275 if self._match(TokenType.IDENTIFIER): 4276 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4277 return self._parse_placeholder() 4278 4279 def _parse_var( 4280 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4281 ) -> t.Optional[exp.Expression]: 4282 if ( 4283 (any_token and self._advance_any()) 4284 or self._match(TokenType.VAR) 4285 or (self._match_set(tokens) if tokens else False) 4286 ): 4287 return self.expression(exp.Var, this=self._prev.text) 4288 return self._parse_placeholder() 4289 4290 def _advance_any(self) -> t.Optional[Token]: 4291 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4292 self._advance() 4293 return self._prev 4294 return None 4295 4296 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4297 return self._parse_var() or self._parse_string() 4298 4299 def _parse_null(self) -> t.Optional[exp.Expression]: 4300 if self._match(TokenType.NULL): 4301 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4302 return None 4303 4304 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4305 if self._match(TokenType.TRUE): 4306 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4307 if self._match(TokenType.FALSE): 4308 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4309 return None 4310 4311 def _parse_star(self) -> t.Optional[exp.Expression]: 4312 if self._match(TokenType.STAR): 4313 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4314 return None 4315 4316 def _parse_parameter(self) -> exp.Parameter: 4317 wrapped = self._match(TokenType.L_BRACE) 4318 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4319 self._match(TokenType.R_BRACE) 4320 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4321 4322 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4323 if self._match_set(self.PLACEHOLDER_PARSERS): 4324 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4325 if placeholder: 4326 return placeholder 4327 self._advance(-1) 4328 return None 4329 4330 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4331 if not self._match(TokenType.EXCEPT): 4332 return None 4333 if self._match(TokenType.L_PAREN, advance=False): 4334 return self._parse_wrapped_csv(self._parse_column) 4335 return self._parse_csv(self._parse_column) 4336 4337 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4338 if not self._match(TokenType.REPLACE): 4339 return None 4340 if self._match(TokenType.L_PAREN, advance=False): 4341 return self._parse_wrapped_csv(self._parse_expression) 4342 return self._parse_expressions() 4343 4344 def _parse_csv( 4345 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4346 ) -> t.List[t.Optional[exp.Expression]]: 4347 parse_result = parse_method() 4348 items = [parse_result] if parse_result is not None else [] 4349 4350 while self._match(sep): 4351 self._add_comments(parse_result) 4352 parse_result = parse_method() 4353 if parse_result is not None: 4354 items.append(parse_result) 4355 4356 return items 4357 4358 def _parse_tokens( 4359 self, parse_method: t.Callable, expressions: t.Dict 4360 ) -> t.Optional[exp.Expression]: 4361 this = parse_method() 4362 4363 while self._match_set(expressions): 4364 this = self.expression( 4365 expressions[self._prev.token_type], 4366 this=this, 4367 comments=self._prev_comments, 4368 expression=parse_method(), 4369 ) 4370 4371 return this 4372 4373 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4374 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4375 4376 def _parse_wrapped_csv( 4377 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4378 ) -> t.List[t.Optional[exp.Expression]]: 4379 return self._parse_wrapped( 4380 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4381 ) 4382 4383 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4384 wrapped = self._match(TokenType.L_PAREN) 4385 if not wrapped and not optional: 4386 self.raise_error("Expecting (") 4387 parse_result = parse_method() 4388 if wrapped: 4389 self._match_r_paren() 4390 return parse_result 4391 4392 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4393 return self._parse_csv(self._parse_expression) 4394 4395 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4396 return self._parse_select() or self._parse_set_operations( 4397 self._parse_expression() if alias else self._parse_conjunction() 4398 ) 4399 4400 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4401 return self._parse_query_modifiers( 4402 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4403 ) 4404 4405 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4406 this = None 4407 if self._match_texts(self.TRANSACTION_KIND): 4408 this = self._prev.text 4409 4410 self._match_texts({"TRANSACTION", "WORK"}) 4411 4412 modes = [] 4413 while True: 4414 mode = [] 4415 while self._match(TokenType.VAR): 4416 mode.append(self._prev.text) 4417 4418 if mode: 4419 modes.append(" ".join(mode)) 4420 if not self._match(TokenType.COMMA): 4421 break 4422 4423 return self.expression(exp.Transaction, this=this, modes=modes) 4424 4425 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4426 chain = None 4427 savepoint = None 4428 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4429 4430 self._match_texts({"TRANSACTION", "WORK"}) 4431 4432 if self._match_text_seq("TO"): 4433 self._match_text_seq("SAVEPOINT") 4434 savepoint = self._parse_id_var() 4435 4436 if self._match(TokenType.AND): 4437 chain = not self._match_text_seq("NO") 4438 self._match_text_seq("CHAIN") 4439 4440 if is_rollback: 4441 return self.expression(exp.Rollback, savepoint=savepoint) 4442 4443 return self.expression(exp.Commit, chain=chain) 4444 4445 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4446 if not self._match_text_seq("ADD"): 4447 return None 4448 4449 self._match(TokenType.COLUMN) 4450 exists_column = self._parse_exists(not_=True) 4451 expression = self._parse_column_def(self._parse_field(any_token=True)) 4452 4453 if expression: 4454 expression.set("exists", exists_column) 4455 4456 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4457 if self._match_texts(("FIRST", "AFTER")): 4458 position = self._prev.text 4459 column_position = self.expression( 4460 exp.ColumnPosition, this=self._parse_column(), position=position 4461 ) 4462 expression.set("position", column_position) 4463 4464 return expression 4465 4466 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4467 drop = self._match(TokenType.DROP) and self._parse_drop() 4468 if drop and not isinstance(drop, exp.Command): 4469 drop.set("kind", drop.args.get("kind", "COLUMN")) 4470 return drop 4471 4472 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4473 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4474 return self.expression( 4475 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4476 ) 4477 4478 def _parse_add_constraint(self) -> exp.AddConstraint: 4479 this = None 4480 kind = self._prev.token_type 4481 4482 if kind == TokenType.CONSTRAINT: 4483 this = self._parse_id_var() 4484 4485 if self._match_text_seq("CHECK"): 4486 expression = self._parse_wrapped(self._parse_conjunction) 4487 enforced = self._match_text_seq("ENFORCED") 4488 4489 return self.expression( 4490 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4491 ) 4492 4493 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4494 expression = self._parse_foreign_key() 4495 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4496 expression = self._parse_primary_key() 4497 else: 4498 expression = None 4499 4500 return self.expression(exp.AddConstraint, this=this, expression=expression) 4501 4502 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4503 index = self._index - 1 4504 4505 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4506 return self._parse_csv(self._parse_add_constraint) 4507 4508 self._retreat(index) 4509 return self._parse_csv(self._parse_add_column) 4510 4511 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4512 self._match(TokenType.COLUMN) 4513 column = self._parse_field(any_token=True) 4514 4515 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4516 return self.expression(exp.AlterColumn, this=column, drop=True) 4517 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4518 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4519 4520 self._match_text_seq("SET", "DATA") 4521 return self.expression( 4522 exp.AlterColumn, 4523 this=column, 4524 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4525 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4526 using=self._match(TokenType.USING) and self._parse_conjunction(), 4527 ) 4528 4529 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4530 index = self._index - 1 4531 4532 partition_exists = self._parse_exists() 4533 if self._match(TokenType.PARTITION, advance=False): 4534 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4535 4536 self._retreat(index) 4537 return self._parse_csv(self._parse_drop_column) 4538 4539 def _parse_alter_table_rename(self) -> exp.RenameTable: 4540 self._match_text_seq("TO") 4541 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4542 4543 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4544 start = self._prev 4545 4546 if not self._match(TokenType.TABLE): 4547 return self._parse_as_command(start) 4548 4549 exists = self._parse_exists() 4550 this = self._parse_table(schema=True) 4551 4552 if self._next: 4553 self._advance() 4554 4555 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4556 if parser: 4557 actions = ensure_list(parser(self)) 4558 4559 if not self._curr: 4560 return self.expression( 4561 exp.AlterTable, 4562 this=this, 4563 exists=exists, 4564 actions=actions, 4565 ) 4566 return self._parse_as_command(start) 4567 4568 def _parse_merge(self) -> exp.Merge: 4569 self._match(TokenType.INTO) 4570 target = self._parse_table() 4571 4572 self._match(TokenType.USING) 4573 using = self._parse_table() 4574 4575 self._match(TokenType.ON) 4576 on = self._parse_conjunction() 4577 4578 whens = [] 4579 while self._match(TokenType.WHEN): 4580 matched = not self._match(TokenType.NOT) 4581 self._match_text_seq("MATCHED") 4582 source = ( 4583 False 4584 if self._match_text_seq("BY", "TARGET") 4585 else self._match_text_seq("BY", "SOURCE") 4586 ) 4587 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4588 4589 self._match(TokenType.THEN) 4590 4591 if self._match(TokenType.INSERT): 4592 _this = self._parse_star() 4593 if _this: 4594 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4595 else: 4596 then = self.expression( 4597 exp.Insert, 4598 this=self._parse_value(), 4599 expression=self._match(TokenType.VALUES) and self._parse_value(), 4600 ) 4601 elif self._match(TokenType.UPDATE): 4602 expressions = self._parse_star() 4603 if expressions: 4604 then = self.expression(exp.Update, expressions=expressions) 4605 else: 4606 then = self.expression( 4607 exp.Update, 4608 expressions=self._match(TokenType.SET) 4609 and self._parse_csv(self._parse_equality), 4610 ) 4611 elif self._match(TokenType.DELETE): 4612 then = self.expression(exp.Var, this=self._prev.text) 4613 else: 4614 then = None 4615 4616 whens.append( 4617 self.expression( 4618 exp.When, 4619 matched=matched, 4620 source=source, 4621 condition=condition, 4622 then=then, 4623 ) 4624 ) 4625 4626 return self.expression( 4627 exp.Merge, 4628 this=target, 4629 using=using, 4630 on=on, 4631 expressions=whens, 4632 ) 4633 4634 def _parse_show(self) -> t.Optional[exp.Expression]: 4635 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4636 if parser: 4637 return parser(self) 4638 self._advance() 4639 return self.expression(exp.Show, this=self._prev.text.upper()) 4640 4641 def _parse_set_item_assignment( 4642 self, kind: t.Optional[str] = None 4643 ) -> t.Optional[exp.Expression]: 4644 index = self._index 4645 4646 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4647 return self._parse_set_transaction(global_=kind == "GLOBAL") 4648 4649 left = self._parse_primary() or self._parse_id_var() 4650 4651 if not self._match_texts(("=", "TO")): 4652 self._retreat(index) 4653 return None 4654 4655 right = self._parse_statement() or self._parse_id_var() 4656 this = self.expression(exp.EQ, this=left, expression=right) 4657 4658 return self.expression(exp.SetItem, this=this, kind=kind) 4659 4660 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4661 self._match_text_seq("TRANSACTION") 4662 characteristics = self._parse_csv( 4663 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4664 ) 4665 return self.expression( 4666 exp.SetItem, 4667 expressions=characteristics, 4668 kind="TRANSACTION", 4669 **{"global": global_}, # type: ignore 4670 ) 4671 4672 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4673 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4674 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4675 4676 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4677 index = self._index 4678 set_ = self.expression( 4679 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4680 ) 4681 4682 if self._curr: 4683 self._retreat(index) 4684 return self._parse_as_command(self._prev) 4685 4686 return set_ 4687 4688 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4689 for option in options: 4690 if self._match_text_seq(*option.split(" ")): 4691 return exp.var(option) 4692 return None 4693 4694 def _parse_as_command(self, start: Token) -> exp.Command: 4695 while self._curr: 4696 self._advance() 4697 text = self._find_sql(start, self._prev) 4698 size = len(start.text) 4699 return exp.Command(this=text[:size], expression=text[size:]) 4700 4701 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4702 settings = [] 4703 4704 self._match_l_paren() 4705 kind = self._parse_id_var() 4706 4707 if self._match(TokenType.L_PAREN): 4708 while True: 4709 key = self._parse_id_var() 4710 value = self._parse_primary() 4711 4712 if not key and value is None: 4713 break 4714 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4715 self._match(TokenType.R_PAREN) 4716 4717 self._match_r_paren() 4718 4719 return self.expression( 4720 exp.DictProperty, 4721 this=this, 4722 kind=kind.this if kind else None, 4723 settings=settings, 4724 ) 4725 4726 def _parse_dict_range(self, this: str) -> exp.DictRange: 4727 self._match_l_paren() 4728 has_min = self._match_text_seq("MIN") 4729 if has_min: 4730 min = self._parse_var() or self._parse_primary() 4731 self._match_text_seq("MAX") 4732 max = self._parse_var() or self._parse_primary() 4733 else: 4734 max = self._parse_var() or self._parse_primary() 4735 min = exp.Literal.number(0) 4736 self._match_r_paren() 4737 return self.expression(exp.DictRange, this=this, min=min, max=max) 4738 4739 def _find_parser( 4740 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4741 ) -> t.Optional[t.Callable]: 4742 if not self._curr: 4743 return None 4744 4745 index = self._index 4746 this = [] 4747 while True: 4748 # The current token might be multiple words 4749 curr = self._curr.text.upper() 4750 key = curr.split(" ") 4751 this.append(curr) 4752 4753 self._advance() 4754 result, trie = in_trie(trie, key) 4755 if result == TrieResult.FAILED: 4756 break 4757 4758 if result == TrieResult.EXISTS: 4759 subparser = parsers[" ".join(this)] 4760 return subparser 4761 4762 self._retreat(index) 4763 return None 4764 4765 def _match(self, token_type, advance=True, expression=None): 4766 if not self._curr: 4767 return None 4768 4769 if self._curr.token_type == token_type: 4770 if advance: 4771 self._advance() 4772 self._add_comments(expression) 4773 return True 4774 4775 return None 4776 4777 def _match_set(self, types, advance=True): 4778 if not self._curr: 4779 return None 4780 4781 if self._curr.token_type in types: 4782 if advance: 4783 self._advance() 4784 return True 4785 4786 return None 4787 4788 def _match_pair(self, token_type_a, token_type_b, advance=True): 4789 if not self._curr or not self._next: 4790 return None 4791 4792 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4793 if advance: 4794 self._advance(2) 4795 return True 4796 4797 return None 4798 4799 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4800 if not self._match(TokenType.L_PAREN, expression=expression): 4801 self.raise_error("Expecting (") 4802 4803 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4804 if not self._match(TokenType.R_PAREN, expression=expression): 4805 self.raise_error("Expecting )") 4806 4807 def _match_texts(self, texts, advance=True): 4808 if self._curr and self._curr.text.upper() in texts: 4809 if advance: 4810 self._advance() 4811 return True 4812 return False 4813 4814 def _match_text_seq(self, *texts, advance=True): 4815 index = self._index 4816 for text in texts: 4817 if self._curr and self._curr.text.upper() == text: 4818 self._advance() 4819 else: 4820 self._retreat(index) 4821 return False 4822 4823 if not advance: 4824 self._retreat(index) 4825 4826 return True 4827 4828 @t.overload 4829 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4830 ... 4831 4832 @t.overload 4833 def _replace_columns_with_dots( 4834 self, this: t.Optional[exp.Expression] 4835 ) -> t.Optional[exp.Expression]: 4836 ... 4837 4838 def _replace_columns_with_dots(self, this): 4839 if isinstance(this, exp.Dot): 4840 exp.replace_children(this, self._replace_columns_with_dots) 4841 elif isinstance(this, exp.Column): 4842 exp.replace_children(this, self._replace_columns_with_dots) 4843 table = this.args.get("table") 4844 this = ( 4845 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4846 ) 4847 4848 return this 4849 4850 def _replace_lambda( 4851 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4852 ) -> t.Optional[exp.Expression]: 4853 if not node: 4854 return node 4855 4856 for column in node.find_all(exp.Column): 4857 if column.parts[0].name in lambda_variables: 4858 dot_or_id = column.to_dot() if column.table else column.this 4859 parent = column.parent 4860 4861 while isinstance(parent, exp.Dot): 4862 if not isinstance(parent.parent, exp.Dot): 4863 parent.replace(dot_or_id) 4864 break 4865 parent = parent.parent 4866 else: 4867 if column is node: 4868 node = dot_or_id 4869 else: 4870 column.replace(dot_or_id) 4871 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.IPADDRESS, 190 TokenType.IPPREFIX, 191 TokenType.ENUM, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = { 203 *Tokenizer.SINGLE_TOKENS.values(), 204 TokenType.SELECT, 205 } 206 207 DB_CREATABLES = { 208 TokenType.DATABASE, 209 TokenType.SCHEMA, 210 TokenType.TABLE, 211 TokenType.VIEW, 212 TokenType.DICTIONARY, 213 } 214 215 CREATABLES = { 216 TokenType.COLUMN, 217 TokenType.FUNCTION, 218 TokenType.INDEX, 219 TokenType.PROCEDURE, 220 *DB_CREATABLES, 221 } 222 223 # Tokens that can represent identifiers 224 ID_VAR_TOKENS = { 225 TokenType.VAR, 226 TokenType.ANTI, 227 TokenType.APPLY, 228 TokenType.ASC, 229 TokenType.AUTO_INCREMENT, 230 TokenType.BEGIN, 231 TokenType.CACHE, 232 TokenType.CASE, 233 TokenType.COLLATE, 234 TokenType.COMMAND, 235 TokenType.COMMENT, 236 TokenType.COMMIT, 237 TokenType.CONSTRAINT, 238 TokenType.DEFAULT, 239 TokenType.DELETE, 240 TokenType.DESC, 241 TokenType.DESCRIBE, 242 TokenType.DICTIONARY, 243 TokenType.DIV, 244 TokenType.END, 245 TokenType.EXECUTE, 246 TokenType.ESCAPE, 247 TokenType.FALSE, 248 TokenType.FIRST, 249 TokenType.FILTER, 250 TokenType.FORMAT, 251 TokenType.FULL, 252 TokenType.IF, 253 TokenType.IS, 254 TokenType.ISNULL, 255 TokenType.INTERVAL, 256 TokenType.KEEP, 257 TokenType.LEFT, 258 TokenType.LOAD, 259 TokenType.MERGE, 260 TokenType.NATURAL, 261 TokenType.NEXT, 262 TokenType.OFFSET, 263 TokenType.ORDINALITY, 264 TokenType.OVERWRITE, 265 TokenType.PARTITION, 266 TokenType.PERCENT, 267 TokenType.PIVOT, 268 TokenType.PRAGMA, 269 TokenType.RANGE, 270 TokenType.REFERENCES, 271 TokenType.RIGHT, 272 TokenType.ROW, 273 TokenType.ROWS, 274 TokenType.SEMI, 275 TokenType.SET, 276 TokenType.SETTINGS, 277 TokenType.SHOW, 278 TokenType.TEMPORARY, 279 TokenType.TOP, 280 TokenType.TRUE, 281 TokenType.UNIQUE, 282 TokenType.UNPIVOT, 283 TokenType.UPDATE, 284 TokenType.VOLATILE, 285 TokenType.WINDOW, 286 *CREATABLES, 287 *SUBQUERY_PREDICATES, 288 *TYPE_TOKENS, 289 *NO_PAREN_FUNCTIONS, 290 } 291 292 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 293 294 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 295 TokenType.APPLY, 296 TokenType.ASOF, 297 TokenType.FULL, 298 TokenType.LEFT, 299 TokenType.LOCK, 300 TokenType.NATURAL, 301 TokenType.OFFSET, 302 TokenType.RIGHT, 303 TokenType.WINDOW, 304 } 305 306 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 307 308 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 309 310 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 311 312 FUNC_TOKENS = { 313 TokenType.COMMAND, 314 TokenType.CURRENT_DATE, 315 TokenType.CURRENT_DATETIME, 316 TokenType.CURRENT_TIMESTAMP, 317 TokenType.CURRENT_TIME, 318 TokenType.CURRENT_USER, 319 TokenType.FILTER, 320 TokenType.FIRST, 321 TokenType.FORMAT, 322 TokenType.GLOB, 323 TokenType.IDENTIFIER, 324 TokenType.INDEX, 325 TokenType.ISNULL, 326 TokenType.ILIKE, 327 TokenType.LIKE, 328 TokenType.MERGE, 329 TokenType.OFFSET, 330 TokenType.PRIMARY_KEY, 331 TokenType.RANGE, 332 TokenType.REPLACE, 333 TokenType.RLIKE, 334 TokenType.ROW, 335 TokenType.UNNEST, 336 TokenType.VAR, 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.DATE, 340 TokenType.DATETIME, 341 TokenType.TABLE, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMPTZ, 344 TokenType.WINDOW, 345 TokenType.XOR, 346 *TYPE_TOKENS, 347 *SUBQUERY_PREDICATES, 348 } 349 350 CONJUNCTION = { 351 TokenType.AND: exp.And, 352 TokenType.OR: exp.Or, 353 } 354 355 EQUALITY = { 356 TokenType.EQ: exp.EQ, 357 TokenType.NEQ: exp.NEQ, 358 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 359 } 360 361 COMPARISON = { 362 TokenType.GT: exp.GT, 363 TokenType.GTE: exp.GTE, 364 TokenType.LT: exp.LT, 365 TokenType.LTE: exp.LTE, 366 } 367 368 BITWISE = { 369 TokenType.AMP: exp.BitwiseAnd, 370 TokenType.CARET: exp.BitwiseXor, 371 TokenType.PIPE: exp.BitwiseOr, 372 TokenType.DPIPE: exp.DPipe, 373 } 374 375 TERM = { 376 TokenType.DASH: exp.Sub, 377 TokenType.PLUS: exp.Add, 378 TokenType.MOD: exp.Mod, 379 TokenType.COLLATE: exp.Collate, 380 } 381 382 FACTOR = { 383 TokenType.DIV: exp.IntDiv, 384 TokenType.LR_ARROW: exp.Distance, 385 TokenType.SLASH: exp.Div, 386 TokenType.STAR: exp.Mul, 387 } 388 389 TIMESTAMPS = { 390 TokenType.TIME, 391 TokenType.TIMESTAMP, 392 TokenType.TIMESTAMPTZ, 393 TokenType.TIMESTAMPLTZ, 394 } 395 396 SET_OPERATIONS = { 397 TokenType.UNION, 398 TokenType.INTERSECT, 399 TokenType.EXCEPT, 400 } 401 402 JOIN_METHODS = { 403 TokenType.NATURAL, 404 TokenType.ASOF, 405 } 406 407 JOIN_SIDES = { 408 TokenType.LEFT, 409 TokenType.RIGHT, 410 TokenType.FULL, 411 } 412 413 JOIN_KINDS = { 414 TokenType.INNER, 415 TokenType.OUTER, 416 TokenType.CROSS, 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 JOIN_HINTS: t.Set[str] = set() 422 423 LAMBDAS = { 424 TokenType.ARROW: lambda self, expressions: self.expression( 425 exp.Lambda, 426 this=self._replace_lambda( 427 self._parse_conjunction(), 428 {node.name for node in expressions}, 429 ), 430 expressions=expressions, 431 ), 432 TokenType.FARROW: lambda self, expressions: self.expression( 433 exp.Kwarg, 434 this=exp.var(expressions[0].name), 435 expression=self._parse_conjunction(), 436 ), 437 } 438 439 COLUMN_OPERATORS = { 440 TokenType.DOT: None, 441 TokenType.DCOLON: lambda self, this, to: self.expression( 442 exp.Cast if self.STRICT_CAST else exp.TryCast, 443 this=this, 444 to=to, 445 ), 446 TokenType.ARROW: lambda self, this, path: self.expression( 447 exp.JSONExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DARROW: lambda self, this, path: self.expression( 452 exp.JSONExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtract, 458 this=this, 459 expression=path, 460 ), 461 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 462 exp.JSONBExtractScalar, 463 this=this, 464 expression=path, 465 ), 466 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 467 exp.JSONBContains, 468 this=this, 469 expression=key, 470 ), 471 } 472 473 EXPRESSION_PARSERS = { 474 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 475 exp.Column: lambda self: self._parse_column(), 476 exp.Condition: lambda self: self._parse_conjunction(), 477 exp.DataType: lambda self: self._parse_types(), 478 exp.Expression: lambda self: self._parse_statement(), 479 exp.From: lambda self: self._parse_from(), 480 exp.Group: lambda self: self._parse_group(), 481 exp.Having: lambda self: self._parse_having(), 482 exp.Identifier: lambda self: self._parse_id_var(), 483 exp.Join: lambda self: self._parse_join(), 484 exp.Lambda: lambda self: self._parse_lambda(), 485 exp.Lateral: lambda self: self._parse_lateral(), 486 exp.Limit: lambda self: self._parse_limit(), 487 exp.Offset: lambda self: self._parse_offset(), 488 exp.Order: lambda self: self._parse_order(), 489 exp.Ordered: lambda self: self._parse_ordered(), 490 exp.Properties: lambda self: self._parse_properties(), 491 exp.Qualify: lambda self: self._parse_qualify(), 492 exp.Returning: lambda self: self._parse_returning(), 493 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 494 exp.Table: lambda self: self._parse_table_parts(), 495 exp.TableAlias: lambda self: self._parse_table_alias(), 496 exp.Where: lambda self: self._parse_where(), 497 exp.Window: lambda self: self._parse_named_window(), 498 exp.With: lambda self: self._parse_with(), 499 "JOIN_TYPE": lambda self: self._parse_join_parts(), 500 } 501 502 STATEMENT_PARSERS = { 503 TokenType.ALTER: lambda self: self._parse_alter(), 504 TokenType.BEGIN: lambda self: self._parse_transaction(), 505 TokenType.CACHE: lambda self: self._parse_cache(), 506 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 507 TokenType.COMMENT: lambda self: self._parse_comment(), 508 TokenType.CREATE: lambda self: self._parse_create(), 509 TokenType.DELETE: lambda self: self._parse_delete(), 510 TokenType.DESC: lambda self: self._parse_describe(), 511 TokenType.DESCRIBE: lambda self: self._parse_describe(), 512 TokenType.DROP: lambda self: self._parse_drop(), 513 TokenType.FROM: lambda self: exp.select("*").from_( 514 t.cast(exp.From, self._parse_from(skip_from_token=True)) 515 ), 516 TokenType.INSERT: lambda self: self._parse_insert(), 517 TokenType.LOAD: lambda self: self._parse_load(), 518 TokenType.MERGE: lambda self: self._parse_merge(), 519 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 520 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 521 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 522 TokenType.SET: lambda self: self._parse_set(), 523 TokenType.UNCACHE: lambda self: self._parse_uncache(), 524 TokenType.UPDATE: lambda self: self._parse_update(), 525 TokenType.USE: lambda self: self.expression( 526 exp.Use, 527 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 528 and exp.var(self._prev.text), 529 this=self._parse_table(schema=False), 530 ), 531 } 532 533 UNARY_PARSERS = { 534 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 535 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 536 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 537 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 538 } 539 540 PRIMARY_PARSERS = { 541 TokenType.STRING: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=True 543 ), 544 TokenType.NUMBER: lambda self, token: self.expression( 545 exp.Literal, this=token.text, is_string=False 546 ), 547 TokenType.STAR: lambda self, _: self.expression( 548 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 549 ), 550 TokenType.NULL: lambda self, _: self.expression(exp.Null), 551 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 552 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 553 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 554 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 555 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 556 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 557 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 558 exp.National, this=token.text 559 ), 560 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 561 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 562 } 563 564 PLACEHOLDER_PARSERS = { 565 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 566 TokenType.PARAMETER: lambda self: self._parse_parameter(), 567 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 568 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 569 else None, 570 } 571 572 RANGE_PARSERS = { 573 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 574 TokenType.GLOB: binary_range_parser(exp.Glob), 575 TokenType.ILIKE: binary_range_parser(exp.ILike), 576 TokenType.IN: lambda self, this: self._parse_in(this), 577 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 578 TokenType.IS: lambda self, this: self._parse_is(this), 579 TokenType.LIKE: binary_range_parser(exp.Like), 580 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 581 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 582 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 583 } 584 585 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 586 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 587 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 588 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 589 "CHARACTER SET": lambda self: self._parse_character_set(), 590 "CHECKSUM": lambda self: self._parse_checksum(), 591 "CLUSTER BY": lambda self: self._parse_cluster(), 592 "CLUSTERED": lambda self: self._parse_clustered_by(), 593 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 594 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 595 "COPY": lambda self: self._parse_copy_property(), 596 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 597 "DEFINER": lambda self: self._parse_definer(), 598 "DETERMINISTIC": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "DISTKEY": lambda self: self._parse_distkey(), 602 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 603 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 604 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 605 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 606 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 607 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "FREESPACE": lambda self: self._parse_freespace(), 609 "HEAP": lambda self: self.expression(exp.HeapProperty), 610 "IMMUTABLE": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 612 ), 613 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 614 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 615 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 616 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 617 "LIKE": lambda self: self._parse_create_like(), 618 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 619 "LOCK": lambda self: self._parse_locking(), 620 "LOCKING": lambda self: self._parse_locking(), 621 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 622 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 623 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 624 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 625 "NO": lambda self: self._parse_no_property(), 626 "ON": lambda self: self._parse_on_property(), 627 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 628 "PARTITION BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 630 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 631 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 632 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 633 "RETURNS": lambda self: self._parse_returns(), 634 "ROW": lambda self: self._parse_row(), 635 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 636 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 637 "SETTINGS": lambda self: self.expression( 638 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 639 ), 640 "SORTKEY": lambda self: self._parse_sortkey(), 641 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 642 "STABLE": lambda self: self.expression( 643 exp.StabilityProperty, this=exp.Literal.string("STABLE") 644 ), 645 "STORED": lambda self: self._parse_stored(), 646 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 647 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 648 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 649 "TO": lambda self: self._parse_to_table(), 650 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 651 "TTL": lambda self: self._parse_ttl(), 652 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 653 "VOLATILE": lambda self: self._parse_volatile_property(), 654 "WITH": lambda self: self._parse_with_property(), 655 } 656 657 CONSTRAINT_PARSERS = { 658 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 659 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 660 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 661 "CHARACTER SET": lambda self: self.expression( 662 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 663 ), 664 "CHECK": lambda self: self.expression( 665 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 666 ), 667 "COLLATE": lambda self: self.expression( 668 exp.CollateColumnConstraint, this=self._parse_var() 669 ), 670 "COMMENT": lambda self: self.expression( 671 exp.CommentColumnConstraint, this=self._parse_string() 672 ), 673 "COMPRESS": lambda self: self._parse_compress(), 674 "DEFAULT": lambda self: self.expression( 675 exp.DefaultColumnConstraint, this=self._parse_bitwise() 676 ), 677 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 678 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 679 "FORMAT": lambda self: self.expression( 680 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 681 ), 682 "GENERATED": lambda self: self._parse_generated_as_identity(), 683 "IDENTITY": lambda self: self._parse_auto_increment(), 684 "INLINE": lambda self: self._parse_inline(), 685 "LIKE": lambda self: self._parse_create_like(), 686 "NOT": lambda self: self._parse_not_constraint(), 687 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 688 "ON": lambda self: self._match(TokenType.UPDATE) 689 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 690 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 691 "PRIMARY KEY": lambda self: self._parse_primary_key(), 692 "REFERENCES": lambda self: self._parse_references(match=False), 693 "TITLE": lambda self: self.expression( 694 exp.TitleColumnConstraint, this=self._parse_var_or_string() 695 ), 696 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 697 "UNIQUE": lambda self: self._parse_unique(), 698 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 699 } 700 701 ALTER_PARSERS = { 702 "ADD": lambda self: self._parse_alter_table_add(), 703 "ALTER": lambda self: self._parse_alter_table_alter(), 704 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 705 "DROP": lambda self: self._parse_alter_table_drop(), 706 "RENAME": lambda self: self._parse_alter_table_rename(), 707 } 708 709 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 710 711 NO_PAREN_FUNCTION_PARSERS = { 712 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 713 TokenType.CASE: lambda self: self._parse_case(), 714 TokenType.IF: lambda self: self._parse_if(), 715 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 716 exp.NextValueFor, 717 this=self._parse_column(), 718 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 719 ), 720 } 721 722 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 723 724 FUNCTION_PARSERS = { 725 "ANY_VALUE": lambda self: self._parse_any_value(), 726 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 727 "CONCAT": lambda self: self._parse_concat(), 728 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 729 "DECODE": lambda self: self._parse_decode(), 730 "EXTRACT": lambda self: self._parse_extract(), 731 "JSON_OBJECT": lambda self: self._parse_json_object(), 732 "LOG": lambda self: self._parse_logarithm(), 733 "MATCH": lambda self: self._parse_match_against(), 734 "OPENJSON": lambda self: self._parse_open_json(), 735 "POSITION": lambda self: self._parse_position(), 736 "SAFE_CAST": lambda self: self._parse_cast(False), 737 "STRING_AGG": lambda self: self._parse_string_agg(), 738 "SUBSTRING": lambda self: self._parse_substring(), 739 "TRIM": lambda self: self._parse_trim(), 740 "TRY_CAST": lambda self: self._parse_cast(False), 741 "TRY_CONVERT": lambda self: self._parse_convert(False), 742 } 743 744 QUERY_MODIFIER_PARSERS = { 745 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 746 TokenType.WHERE: lambda self: ("where", self._parse_where()), 747 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 748 TokenType.HAVING: lambda self: ("having", self._parse_having()), 749 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 750 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 751 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 752 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 753 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 754 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 755 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 756 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 757 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 759 TokenType.CLUSTER_BY: lambda self: ( 760 "cluster", 761 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 762 ), 763 TokenType.DISTRIBUTE_BY: lambda self: ( 764 "distribute", 765 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 766 ), 767 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 768 } 769 770 SET_PARSERS = { 771 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 772 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 773 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 774 "TRANSACTION": lambda self: self._parse_set_transaction(), 775 } 776 777 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 778 779 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 780 781 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 782 783 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 784 785 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 786 787 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 788 TRANSACTION_CHARACTERISTICS = { 789 "ISOLATION LEVEL REPEATABLE READ", 790 "ISOLATION LEVEL READ COMMITTED", 791 "ISOLATION LEVEL READ UNCOMMITTED", 792 "ISOLATION LEVEL SERIALIZABLE", 793 "READ WRITE", 794 "READ ONLY", 795 } 796 797 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 798 799 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 800 801 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 802 803 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 804 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 805 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 806 807 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 808 809 STRICT_CAST = True 810 811 # A NULL arg in CONCAT yields NULL by default 812 CONCAT_NULL_OUTPUTS_STRING = False 813 814 PREFIXED_PIVOT_COLUMNS = False 815 IDENTIFY_PIVOT_STRINGS = False 816 817 LOG_BASE_FIRST = True 818 LOG_DEFAULTS_TO_LN = False 819 820 __slots__ = ( 821 "error_level", 822 "error_message_context", 823 "max_errors", 824 "sql", 825 "errors", 826 "_tokens", 827 "_index", 828 "_curr", 829 "_next", 830 "_prev", 831 "_prev_comments", 832 ) 833 834 # Autofilled 835 INDEX_OFFSET: int = 0 836 UNNEST_COLUMN_ONLY: bool = False 837 ALIAS_POST_TABLESAMPLE: bool = False 838 STRICT_STRING_CONCAT = False 839 NULL_ORDERING: str = "nulls_are_small" 840 SHOW_TRIE: t.Dict = {} 841 SET_TRIE: t.Dict = {} 842 FORMAT_MAPPING: t.Dict[str, str] = {} 843 FORMAT_TRIE: t.Dict = {} 844 TIME_MAPPING: t.Dict[str, str] = {} 845 TIME_TRIE: t.Dict = {} 846 847 def __init__( 848 self, 849 error_level: t.Optional[ErrorLevel] = None, 850 error_message_context: int = 100, 851 max_errors: int = 3, 852 ): 853 self.error_level = error_level or ErrorLevel.IMMEDIATE 854 self.error_message_context = error_message_context 855 self.max_errors = max_errors 856 self.reset() 857 858 def reset(self): 859 self.sql = "" 860 self.errors = [] 861 self._tokens = [] 862 self._index = 0 863 self._curr = None 864 self._next = None 865 self._prev = None 866 self._prev_comments = None 867 868 def parse( 869 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 870 ) -> t.List[t.Optional[exp.Expression]]: 871 """ 872 Parses a list of tokens and returns a list of syntax trees, one tree 873 per parsed SQL statement. 874 875 Args: 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The list of the produced syntax trees. 881 """ 882 return self._parse( 883 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 884 ) 885 886 def parse_into( 887 self, 888 expression_types: exp.IntoType, 889 raw_tokens: t.List[Token], 890 sql: t.Optional[str] = None, 891 ) -> t.List[t.Optional[exp.Expression]]: 892 """ 893 Parses a list of tokens into a given Expression type. If a collection of Expression 894 types is given instead, this method will try to parse the token list into each one 895 of them, stopping at the first for which the parsing succeeds. 896 897 Args: 898 expression_types: The expression type(s) to try and parse the token list into. 899 raw_tokens: The list of tokens. 900 sql: The original SQL string, used to produce helpful debug messages. 901 902 Returns: 903 The target Expression. 904 """ 905 errors = [] 906 for expression_type in ensure_list(expression_types): 907 parser = self.EXPRESSION_PARSERS.get(expression_type) 908 if not parser: 909 raise TypeError(f"No parser registered for {expression_type}") 910 911 try: 912 return self._parse(parser, raw_tokens, sql) 913 except ParseError as e: 914 e.errors[0]["into_expression"] = expression_type 915 errors.append(e) 916 917 raise ParseError( 918 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 919 errors=merge_errors(errors), 920 ) from errors[-1] 921 922 def _parse( 923 self, 924 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 925 raw_tokens: t.List[Token], 926 sql: t.Optional[str] = None, 927 ) -> t.List[t.Optional[exp.Expression]]: 928 self.reset() 929 self.sql = sql or "" 930 931 total = len(raw_tokens) 932 chunks: t.List[t.List[Token]] = [[]] 933 934 for i, token in enumerate(raw_tokens): 935 if token.token_type == TokenType.SEMICOLON: 936 if i < total - 1: 937 chunks.append([]) 938 else: 939 chunks[-1].append(token) 940 941 expressions = [] 942 943 for tokens in chunks: 944 self._index = -1 945 self._tokens = tokens 946 self._advance() 947 948 expressions.append(parse_method(self)) 949 950 if self._index < len(self._tokens): 951 self.raise_error("Invalid expression / Unexpected token") 952 953 self.check_errors() 954 955 return expressions 956 957 def check_errors(self) -> None: 958 """Logs or raises any found errors, depending on the chosen error level setting.""" 959 if self.error_level == ErrorLevel.WARN: 960 for error in self.errors: 961 logger.error(str(error)) 962 elif self.error_level == ErrorLevel.RAISE and self.errors: 963 raise ParseError( 964 concat_messages(self.errors, self.max_errors), 965 errors=merge_errors(self.errors), 966 ) 967 968 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 969 """ 970 Appends an error in the list of recorded errors or raises it, depending on the chosen 971 error level setting. 972 """ 973 token = token or self._curr or self._prev or Token.string("") 974 start = token.start 975 end = token.end + 1 976 start_context = self.sql[max(start - self.error_message_context, 0) : start] 977 highlight = self.sql[start:end] 978 end_context = self.sql[end : end + self.error_message_context] 979 980 error = ParseError.new( 981 f"{message}. Line {token.line}, Col: {token.col}.\n" 982 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 983 description=message, 984 line=token.line, 985 col=token.col, 986 start_context=start_context, 987 highlight=highlight, 988 end_context=end_context, 989 ) 990 991 if self.error_level == ErrorLevel.IMMEDIATE: 992 raise error 993 994 self.errors.append(error) 995 996 def expression( 997 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 998 ) -> E: 999 """ 1000 Creates a new, validated Expression. 1001 1002 Args: 1003 exp_class: The expression class to instantiate. 1004 comments: An optional list of comments to attach to the expression. 1005 kwargs: The arguments to set for the expression along with their respective values. 1006 1007 Returns: 1008 The target expression. 1009 """ 1010 instance = exp_class(**kwargs) 1011 instance.add_comments(comments) if comments else self._add_comments(instance) 1012 return self.validate_expression(instance) 1013 1014 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1015 if expression and self._prev_comments: 1016 expression.add_comments(self._prev_comments) 1017 self._prev_comments = None 1018 1019 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1020 """ 1021 Validates an Expression, making sure that all its mandatory arguments are set. 1022 1023 Args: 1024 expression: The expression to validate. 1025 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1026 1027 Returns: 1028 The validated expression. 1029 """ 1030 if self.error_level != ErrorLevel.IGNORE: 1031 for error_message in expression.error_messages(args): 1032 self.raise_error(error_message) 1033 1034 return expression 1035 1036 def _find_sql(self, start: Token, end: Token) -> str: 1037 return self.sql[start.start : end.end + 1] 1038 1039 def _advance(self, times: int = 1) -> None: 1040 self._index += times 1041 self._curr = seq_get(self._tokens, self._index) 1042 self._next = seq_get(self._tokens, self._index + 1) 1043 1044 if self._index > 0: 1045 self._prev = self._tokens[self._index - 1] 1046 self._prev_comments = self._prev.comments 1047 else: 1048 self._prev = None 1049 self._prev_comments = None 1050 1051 def _retreat(self, index: int) -> None: 1052 if index != self._index: 1053 self._advance(index - self._index) 1054 1055 def _parse_command(self) -> exp.Command: 1056 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1057 1058 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1059 start = self._prev 1060 exists = self._parse_exists() if allow_exists else None 1061 1062 self._match(TokenType.ON) 1063 1064 kind = self._match_set(self.CREATABLES) and self._prev 1065 if not kind: 1066 return self._parse_as_command(start) 1067 1068 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1069 this = self._parse_user_defined_function(kind=kind.token_type) 1070 elif kind.token_type == TokenType.TABLE: 1071 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1072 elif kind.token_type == TokenType.COLUMN: 1073 this = self._parse_column() 1074 else: 1075 this = self._parse_id_var() 1076 1077 self._match(TokenType.IS) 1078 1079 return self.expression( 1080 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1081 ) 1082 1083 def _parse_to_table( 1084 self, 1085 ) -> exp.ToTableProperty: 1086 table = self._parse_table_parts(schema=True) 1087 return self.expression(exp.ToTableProperty, this=table) 1088 1089 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1090 def _parse_ttl(self) -> exp.Expression: 1091 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1092 this = self._parse_bitwise() 1093 1094 if self._match_text_seq("DELETE"): 1095 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1096 if self._match_text_seq("RECOMPRESS"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1099 ) 1100 if self._match_text_seq("TO", "DISK"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1103 ) 1104 if self._match_text_seq("TO", "VOLUME"): 1105 return self.expression( 1106 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1107 ) 1108 1109 return this 1110 1111 expressions = self._parse_csv(_parse_ttl_action) 1112 where = self._parse_where() 1113 group = self._parse_group() 1114 1115 aggregates = None 1116 if group and self._match(TokenType.SET): 1117 aggregates = self._parse_csv(self._parse_set_item) 1118 1119 return self.expression( 1120 exp.MergeTreeTTL, 1121 expressions=expressions, 1122 where=where, 1123 group=group, 1124 aggregates=aggregates, 1125 ) 1126 1127 def _parse_statement(self) -> t.Optional[exp.Expression]: 1128 if self._curr is None: 1129 return None 1130 1131 if self._match_set(self.STATEMENT_PARSERS): 1132 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1133 1134 if self._match_set(Tokenizer.COMMANDS): 1135 return self._parse_command() 1136 1137 expression = self._parse_expression() 1138 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1139 return self._parse_query_modifiers(expression) 1140 1141 def _parse_drop(self) -> exp.Drop | exp.Command: 1142 start = self._prev 1143 temporary = self._match(TokenType.TEMPORARY) 1144 materialized = self._match_text_seq("MATERIALIZED") 1145 1146 kind = self._match_set(self.CREATABLES) and self._prev.text 1147 if not kind: 1148 return self._parse_as_command(start) 1149 1150 return self.expression( 1151 exp.Drop, 1152 comments=start.comments, 1153 exists=self._parse_exists(), 1154 this=self._parse_table(schema=True), 1155 kind=kind, 1156 temporary=temporary, 1157 materialized=materialized, 1158 cascade=self._match_text_seq("CASCADE"), 1159 constraints=self._match_text_seq("CONSTRAINTS"), 1160 purge=self._match_text_seq("PURGE"), 1161 ) 1162 1163 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1164 return ( 1165 self._match(TokenType.IF) 1166 and (not not_ or self._match(TokenType.NOT)) 1167 and self._match(TokenType.EXISTS) 1168 ) 1169 1170 def _parse_create(self) -> exp.Create | exp.Command: 1171 # Note: this can't be None because we've matched a statement parser 1172 start = self._prev 1173 replace = start.text.upper() == "REPLACE" or self._match_pair( 1174 TokenType.OR, TokenType.REPLACE 1175 ) 1176 unique = self._match(TokenType.UNIQUE) 1177 1178 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1179 self._advance() 1180 1181 properties = None 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not create_token: 1185 # exp.Properties.Location.POST_CREATE 1186 properties = self._parse_properties() 1187 create_token = self._match_set(self.CREATABLES) and self._prev 1188 1189 if not properties or not create_token: 1190 return self._parse_as_command(start) 1191 1192 exists = self._parse_exists(not_=True) 1193 this = None 1194 expression: t.Optional[exp.Expression] = None 1195 indexes = None 1196 no_schema_binding = None 1197 begin = None 1198 clone = None 1199 1200 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1201 nonlocal properties 1202 if properties and temp_props: 1203 properties.expressions.extend(temp_props.expressions) 1204 elif temp_props: 1205 properties = temp_props 1206 1207 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1208 this = self._parse_user_defined_function(kind=create_token.token_type) 1209 1210 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1211 extend_props(self._parse_properties()) 1212 1213 self._match(TokenType.ALIAS) 1214 1215 if self._match(TokenType.COMMAND): 1216 expression = self._parse_as_command(self._prev) 1217 else: 1218 begin = self._match(TokenType.BEGIN) 1219 return_ = self._match_text_seq("RETURN") 1220 expression = self._parse_statement() 1221 1222 if return_: 1223 expression = self.expression(exp.Return, this=expression) 1224 elif create_token.token_type == TokenType.INDEX: 1225 this = self._parse_index(index=self._parse_id_var()) 1226 elif create_token.token_type in self.DB_CREATABLES: 1227 table_parts = self._parse_table_parts(schema=True) 1228 1229 # exp.Properties.Location.POST_NAME 1230 self._match(TokenType.COMMA) 1231 extend_props(self._parse_properties(before=True)) 1232 1233 this = self._parse_schema(this=table_parts) 1234 1235 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1236 extend_props(self._parse_properties()) 1237 1238 self._match(TokenType.ALIAS) 1239 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1240 # exp.Properties.Location.POST_ALIAS 1241 extend_props(self._parse_properties()) 1242 1243 expression = self._parse_ddl_select() 1244 1245 if create_token.token_type == TokenType.TABLE: 1246 # exp.Properties.Location.POST_EXPRESSION 1247 extend_props(self._parse_properties()) 1248 1249 indexes = [] 1250 while True: 1251 index = self._parse_index() 1252 1253 # exp.Properties.Location.POST_INDEX 1254 extend_props(self._parse_properties()) 1255 1256 if not index: 1257 break 1258 else: 1259 self._match(TokenType.COMMA) 1260 indexes.append(index) 1261 elif create_token.token_type == TokenType.VIEW: 1262 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1263 no_schema_binding = True 1264 1265 if self._match_text_seq("CLONE"): 1266 clone = self._parse_table(schema=True) 1267 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1268 clone_kind = ( 1269 self._match(TokenType.L_PAREN) 1270 and self._match_texts(self.CLONE_KINDS) 1271 and self._prev.text.upper() 1272 ) 1273 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1274 self._match(TokenType.R_PAREN) 1275 clone = self.expression( 1276 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1277 ) 1278 1279 return self.expression( 1280 exp.Create, 1281 this=this, 1282 kind=create_token.text, 1283 replace=replace, 1284 unique=unique, 1285 expression=expression, 1286 exists=exists, 1287 properties=properties, 1288 indexes=indexes, 1289 no_schema_binding=no_schema_binding, 1290 begin=begin, 1291 clone=clone, 1292 ) 1293 1294 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1295 # only used for teradata currently 1296 self._match(TokenType.COMMA) 1297 1298 kwargs = { 1299 "no": self._match_text_seq("NO"), 1300 "dual": self._match_text_seq("DUAL"), 1301 "before": self._match_text_seq("BEFORE"), 1302 "default": self._match_text_seq("DEFAULT"), 1303 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1304 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1305 "after": self._match_text_seq("AFTER"), 1306 "minimum": self._match_texts(("MIN", "MINIMUM")), 1307 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1308 } 1309 1310 if self._match_texts(self.PROPERTY_PARSERS): 1311 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1312 try: 1313 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1314 except TypeError: 1315 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1316 1317 return None 1318 1319 def _parse_property(self) -> t.Optional[exp.Expression]: 1320 if self._match_texts(self.PROPERTY_PARSERS): 1321 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1322 1323 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1324 return self._parse_character_set(default=True) 1325 1326 if self._match_text_seq("COMPOUND", "SORTKEY"): 1327 return self._parse_sortkey(compound=True) 1328 1329 if self._match_text_seq("SQL", "SECURITY"): 1330 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1331 1332 assignment = self._match_pair( 1333 TokenType.VAR, TokenType.EQ, advance=False 1334 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1335 1336 if assignment: 1337 key = self._parse_var_or_string() 1338 self._match(TokenType.EQ) 1339 return self.expression(exp.Property, this=key, value=self._parse_column()) 1340 1341 return None 1342 1343 def _parse_stored(self) -> exp.FileFormatProperty: 1344 self._match(TokenType.ALIAS) 1345 1346 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1347 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1348 1349 return self.expression( 1350 exp.FileFormatProperty, 1351 this=self.expression( 1352 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1353 ) 1354 if input_format or output_format 1355 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1356 ) 1357 1358 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1359 self._match(TokenType.EQ) 1360 self._match(TokenType.ALIAS) 1361 return self.expression(exp_class, this=self._parse_field()) 1362 1363 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1364 properties = [] 1365 while True: 1366 if before: 1367 prop = self._parse_property_before() 1368 else: 1369 prop = self._parse_property() 1370 1371 if not prop: 1372 break 1373 for p in ensure_list(prop): 1374 properties.append(p) 1375 1376 if properties: 1377 return self.expression(exp.Properties, expressions=properties) 1378 1379 return None 1380 1381 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1382 return self.expression( 1383 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1384 ) 1385 1386 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1387 if self._index >= 2: 1388 pre_volatile_token = self._tokens[self._index - 2] 1389 else: 1390 pre_volatile_token = None 1391 1392 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1393 return exp.VolatileProperty() 1394 1395 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1396 1397 def _parse_with_property( 1398 self, 1399 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1400 if self._match(TokenType.L_PAREN, advance=False): 1401 return self._parse_wrapped_csv(self._parse_property) 1402 1403 if self._match_text_seq("JOURNAL"): 1404 return self._parse_withjournaltable() 1405 1406 if self._match_text_seq("DATA"): 1407 return self._parse_withdata(no=False) 1408 elif self._match_text_seq("NO", "DATA"): 1409 return self._parse_withdata(no=True) 1410 1411 if not self._next: 1412 return None 1413 1414 return self._parse_withisolatedloading() 1415 1416 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1417 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1418 self._match(TokenType.EQ) 1419 1420 user = self._parse_id_var() 1421 self._match(TokenType.PARAMETER) 1422 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1423 1424 if not user or not host: 1425 return None 1426 1427 return exp.DefinerProperty(this=f"{user}@{host}") 1428 1429 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1430 self._match(TokenType.TABLE) 1431 self._match(TokenType.EQ) 1432 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1433 1434 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1435 return self.expression(exp.LogProperty, no=no) 1436 1437 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1438 return self.expression(exp.JournalProperty, **kwargs) 1439 1440 def _parse_checksum(self) -> exp.ChecksumProperty: 1441 self._match(TokenType.EQ) 1442 1443 on = None 1444 if self._match(TokenType.ON): 1445 on = True 1446 elif self._match_text_seq("OFF"): 1447 on = False 1448 1449 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1450 1451 def _parse_cluster(self) -> exp.Cluster: 1452 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1453 1454 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1455 self._match_text_seq("BY") 1456 1457 self._match_l_paren() 1458 expressions = self._parse_csv(self._parse_column) 1459 self._match_r_paren() 1460 1461 if self._match_text_seq("SORTED", "BY"): 1462 self._match_l_paren() 1463 sorted_by = self._parse_csv(self._parse_ordered) 1464 self._match_r_paren() 1465 else: 1466 sorted_by = None 1467 1468 self._match(TokenType.INTO) 1469 buckets = self._parse_number() 1470 self._match_text_seq("BUCKETS") 1471 1472 return self.expression( 1473 exp.ClusteredByProperty, 1474 expressions=expressions, 1475 sorted_by=sorted_by, 1476 buckets=buckets, 1477 ) 1478 1479 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1480 if not self._match_text_seq("GRANTS"): 1481 self._retreat(self._index - 1) 1482 return None 1483 1484 return self.expression(exp.CopyGrantsProperty) 1485 1486 def _parse_freespace(self) -> exp.FreespaceProperty: 1487 self._match(TokenType.EQ) 1488 return self.expression( 1489 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1490 ) 1491 1492 def _parse_mergeblockratio( 1493 self, no: bool = False, default: bool = False 1494 ) -> exp.MergeBlockRatioProperty: 1495 if self._match(TokenType.EQ): 1496 return self.expression( 1497 exp.MergeBlockRatioProperty, 1498 this=self._parse_number(), 1499 percent=self._match(TokenType.PERCENT), 1500 ) 1501 1502 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1503 1504 def _parse_datablocksize( 1505 self, 1506 default: t.Optional[bool] = None, 1507 minimum: t.Optional[bool] = None, 1508 maximum: t.Optional[bool] = None, 1509 ) -> exp.DataBlocksizeProperty: 1510 self._match(TokenType.EQ) 1511 size = self._parse_number() 1512 1513 units = None 1514 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1515 units = self._prev.text 1516 1517 return self.expression( 1518 exp.DataBlocksizeProperty, 1519 size=size, 1520 units=units, 1521 default=default, 1522 minimum=minimum, 1523 maximum=maximum, 1524 ) 1525 1526 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1527 self._match(TokenType.EQ) 1528 always = self._match_text_seq("ALWAYS") 1529 manual = self._match_text_seq("MANUAL") 1530 never = self._match_text_seq("NEVER") 1531 default = self._match_text_seq("DEFAULT") 1532 1533 autotemp = None 1534 if self._match_text_seq("AUTOTEMP"): 1535 autotemp = self._parse_schema() 1536 1537 return self.expression( 1538 exp.BlockCompressionProperty, 1539 always=always, 1540 manual=manual, 1541 never=never, 1542 default=default, 1543 autotemp=autotemp, 1544 ) 1545 1546 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1547 no = self._match_text_seq("NO") 1548 concurrent = self._match_text_seq("CONCURRENT") 1549 self._match_text_seq("ISOLATED", "LOADING") 1550 for_all = self._match_text_seq("FOR", "ALL") 1551 for_insert = self._match_text_seq("FOR", "INSERT") 1552 for_none = self._match_text_seq("FOR", "NONE") 1553 return self.expression( 1554 exp.IsolatedLoadingProperty, 1555 no=no, 1556 concurrent=concurrent, 1557 for_all=for_all, 1558 for_insert=for_insert, 1559 for_none=for_none, 1560 ) 1561 1562 def _parse_locking(self) -> exp.LockingProperty: 1563 if self._match(TokenType.TABLE): 1564 kind = "TABLE" 1565 elif self._match(TokenType.VIEW): 1566 kind = "VIEW" 1567 elif self._match(TokenType.ROW): 1568 kind = "ROW" 1569 elif self._match_text_seq("DATABASE"): 1570 kind = "DATABASE" 1571 else: 1572 kind = None 1573 1574 if kind in ("DATABASE", "TABLE", "VIEW"): 1575 this = self._parse_table_parts() 1576 else: 1577 this = None 1578 1579 if self._match(TokenType.FOR): 1580 for_or_in = "FOR" 1581 elif self._match(TokenType.IN): 1582 for_or_in = "IN" 1583 else: 1584 for_or_in = None 1585 1586 if self._match_text_seq("ACCESS"): 1587 lock_type = "ACCESS" 1588 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1589 lock_type = "EXCLUSIVE" 1590 elif self._match_text_seq("SHARE"): 1591 lock_type = "SHARE" 1592 elif self._match_text_seq("READ"): 1593 lock_type = "READ" 1594 elif self._match_text_seq("WRITE"): 1595 lock_type = "WRITE" 1596 elif self._match_text_seq("CHECKSUM"): 1597 lock_type = "CHECKSUM" 1598 else: 1599 lock_type = None 1600 1601 override = self._match_text_seq("OVERRIDE") 1602 1603 return self.expression( 1604 exp.LockingProperty, 1605 this=this, 1606 kind=kind, 1607 for_or_in=for_or_in, 1608 lock_type=lock_type, 1609 override=override, 1610 ) 1611 1612 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1613 if self._match(TokenType.PARTITION_BY): 1614 return self._parse_csv(self._parse_conjunction) 1615 return [] 1616 1617 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1618 self._match(TokenType.EQ) 1619 return self.expression( 1620 exp.PartitionedByProperty, 1621 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1622 ) 1623 1624 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1625 if self._match_text_seq("AND", "STATISTICS"): 1626 statistics = True 1627 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1628 statistics = False 1629 else: 1630 statistics = None 1631 1632 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1633 1634 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1635 if self._match_text_seq("PRIMARY", "INDEX"): 1636 return exp.NoPrimaryIndexProperty() 1637 return None 1638 1639 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1640 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1641 return exp.OnCommitProperty() 1642 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1643 return exp.OnCommitProperty(delete=True) 1644 return None 1645 1646 def _parse_distkey(self) -> exp.DistKeyProperty: 1647 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1648 1649 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1650 table = self._parse_table(schema=True) 1651 1652 options = [] 1653 while self._match_texts(("INCLUDING", "EXCLUDING")): 1654 this = self._prev.text.upper() 1655 1656 id_var = self._parse_id_var() 1657 if not id_var: 1658 return None 1659 1660 options.append( 1661 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1662 ) 1663 1664 return self.expression(exp.LikeProperty, this=table, expressions=options) 1665 1666 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1667 return self.expression( 1668 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1669 ) 1670 1671 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1672 self._match(TokenType.EQ) 1673 return self.expression( 1674 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1675 ) 1676 1677 def _parse_returns(self) -> exp.ReturnsProperty: 1678 value: t.Optional[exp.Expression] 1679 is_table = self._match(TokenType.TABLE) 1680 1681 if is_table: 1682 if self._match(TokenType.LT): 1683 value = self.expression( 1684 exp.Schema, 1685 this="TABLE", 1686 expressions=self._parse_csv(self._parse_struct_types), 1687 ) 1688 if not self._match(TokenType.GT): 1689 self.raise_error("Expecting >") 1690 else: 1691 value = self._parse_schema(exp.var("TABLE")) 1692 else: 1693 value = self._parse_types() 1694 1695 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1696 1697 def _parse_describe(self) -> exp.Describe: 1698 kind = self._match_set(self.CREATABLES) and self._prev.text 1699 this = self._parse_table() 1700 return self.expression(exp.Describe, this=this, kind=kind) 1701 1702 def _parse_insert(self) -> exp.Insert: 1703 comments = ensure_list(self._prev_comments) 1704 overwrite = self._match(TokenType.OVERWRITE) 1705 ignore = self._match(TokenType.IGNORE) 1706 local = self._match_text_seq("LOCAL") 1707 alternative = None 1708 1709 if self._match_text_seq("DIRECTORY"): 1710 this: t.Optional[exp.Expression] = self.expression( 1711 exp.Directory, 1712 this=self._parse_var_or_string(), 1713 local=local, 1714 row_format=self._parse_row_format(match_row=True), 1715 ) 1716 else: 1717 if self._match(TokenType.OR): 1718 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1719 1720 self._match(TokenType.INTO) 1721 comments += ensure_list(self._prev_comments) 1722 self._match(TokenType.TABLE) 1723 this = self._parse_table(schema=True) 1724 1725 returning = self._parse_returning() 1726 1727 return self.expression( 1728 exp.Insert, 1729 comments=comments, 1730 this=this, 1731 exists=self._parse_exists(), 1732 partition=self._parse_partition(), 1733 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1734 and self._parse_conjunction(), 1735 expression=self._parse_ddl_select(), 1736 conflict=self._parse_on_conflict(), 1737 returning=returning or self._parse_returning(), 1738 overwrite=overwrite, 1739 alternative=alternative, 1740 ignore=ignore, 1741 ) 1742 1743 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1744 conflict = self._match_text_seq("ON", "CONFLICT") 1745 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1746 1747 if not conflict and not duplicate: 1748 return None 1749 1750 nothing = None 1751 expressions = None 1752 key = None 1753 constraint = None 1754 1755 if conflict: 1756 if self._match_text_seq("ON", "CONSTRAINT"): 1757 constraint = self._parse_id_var() 1758 else: 1759 key = self._parse_csv(self._parse_value) 1760 1761 self._match_text_seq("DO") 1762 if self._match_text_seq("NOTHING"): 1763 nothing = True 1764 else: 1765 self._match(TokenType.UPDATE) 1766 self._match(TokenType.SET) 1767 expressions = self._parse_csv(self._parse_equality) 1768 1769 return self.expression( 1770 exp.OnConflict, 1771 duplicate=duplicate, 1772 expressions=expressions, 1773 nothing=nothing, 1774 key=key, 1775 constraint=constraint, 1776 ) 1777 1778 def _parse_returning(self) -> t.Optional[exp.Returning]: 1779 if not self._match(TokenType.RETURNING): 1780 return None 1781 return self.expression( 1782 exp.Returning, 1783 expressions=self._parse_csv(self._parse_expression), 1784 into=self._match(TokenType.INTO) and self._parse_table_part(), 1785 ) 1786 1787 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1788 if not self._match(TokenType.FORMAT): 1789 return None 1790 return self._parse_row_format() 1791 1792 def _parse_row_format( 1793 self, match_row: bool = False 1794 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1795 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1796 return None 1797 1798 if self._match_text_seq("SERDE"): 1799 this = self._parse_string() 1800 1801 serde_properties = None 1802 if self._match(TokenType.SERDE_PROPERTIES): 1803 serde_properties = self.expression( 1804 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1805 ) 1806 1807 return self.expression( 1808 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1809 ) 1810 1811 self._match_text_seq("DELIMITED") 1812 1813 kwargs = {} 1814 1815 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1816 kwargs["fields"] = self._parse_string() 1817 if self._match_text_seq("ESCAPED", "BY"): 1818 kwargs["escaped"] = self._parse_string() 1819 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1820 kwargs["collection_items"] = self._parse_string() 1821 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1822 kwargs["map_keys"] = self._parse_string() 1823 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1824 kwargs["lines"] = self._parse_string() 1825 if self._match_text_seq("NULL", "DEFINED", "AS"): 1826 kwargs["null"] = self._parse_string() 1827 1828 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1829 1830 def _parse_load(self) -> exp.LoadData | exp.Command: 1831 if self._match_text_seq("DATA"): 1832 local = self._match_text_seq("LOCAL") 1833 self._match_text_seq("INPATH") 1834 inpath = self._parse_string() 1835 overwrite = self._match(TokenType.OVERWRITE) 1836 self._match_pair(TokenType.INTO, TokenType.TABLE) 1837 1838 return self.expression( 1839 exp.LoadData, 1840 this=self._parse_table(schema=True), 1841 local=local, 1842 overwrite=overwrite, 1843 inpath=inpath, 1844 partition=self._parse_partition(), 1845 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1846 serde=self._match_text_seq("SERDE") and self._parse_string(), 1847 ) 1848 return self._parse_as_command(self._prev) 1849 1850 def _parse_delete(self) -> exp.Delete: 1851 # This handles MySQL's "Multiple-Table Syntax" 1852 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1853 tables = None 1854 comments = self._prev_comments 1855 if not self._match(TokenType.FROM, advance=False): 1856 tables = self._parse_csv(self._parse_table) or None 1857 1858 returning = self._parse_returning() 1859 1860 return self.expression( 1861 exp.Delete, 1862 comments=comments, 1863 tables=tables, 1864 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1865 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1866 where=self._parse_where(), 1867 returning=returning or self._parse_returning(), 1868 limit=self._parse_limit(), 1869 ) 1870 1871 def _parse_update(self) -> exp.Update: 1872 comments = self._prev_comments 1873 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1874 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1875 returning = self._parse_returning() 1876 return self.expression( 1877 exp.Update, 1878 comments=comments, 1879 **{ # type: ignore 1880 "this": this, 1881 "expressions": expressions, 1882 "from": self._parse_from(joins=True), 1883 "where": self._parse_where(), 1884 "returning": returning or self._parse_returning(), 1885 "limit": self._parse_limit(), 1886 }, 1887 ) 1888 1889 def _parse_uncache(self) -> exp.Uncache: 1890 if not self._match(TokenType.TABLE): 1891 self.raise_error("Expecting TABLE after UNCACHE") 1892 1893 return self.expression( 1894 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1895 ) 1896 1897 def _parse_cache(self) -> exp.Cache: 1898 lazy = self._match_text_seq("LAZY") 1899 self._match(TokenType.TABLE) 1900 table = self._parse_table(schema=True) 1901 1902 options = [] 1903 if self._match_text_seq("OPTIONS"): 1904 self._match_l_paren() 1905 k = self._parse_string() 1906 self._match(TokenType.EQ) 1907 v = self._parse_string() 1908 options = [k, v] 1909 self._match_r_paren() 1910 1911 self._match(TokenType.ALIAS) 1912 return self.expression( 1913 exp.Cache, 1914 this=table, 1915 lazy=lazy, 1916 options=options, 1917 expression=self._parse_select(nested=True), 1918 ) 1919 1920 def _parse_partition(self) -> t.Optional[exp.Partition]: 1921 if not self._match(TokenType.PARTITION): 1922 return None 1923 1924 return self.expression( 1925 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1926 ) 1927 1928 def _parse_value(self) -> exp.Tuple: 1929 if self._match(TokenType.L_PAREN): 1930 expressions = self._parse_csv(self._parse_conjunction) 1931 self._match_r_paren() 1932 return self.expression(exp.Tuple, expressions=expressions) 1933 1934 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1935 # https://prestodb.io/docs/current/sql/values.html 1936 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1937 1938 def _parse_select( 1939 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1940 ) -> t.Optional[exp.Expression]: 1941 cte = self._parse_with() 1942 if cte: 1943 this = self._parse_statement() 1944 1945 if not this: 1946 self.raise_error("Failed to parse any statement following CTE") 1947 return cte 1948 1949 if "with" in this.arg_types: 1950 this.set("with", cte) 1951 else: 1952 self.raise_error(f"{this.key} does not support CTE") 1953 this = cte 1954 elif self._match(TokenType.SELECT): 1955 comments = self._prev_comments 1956 1957 hint = self._parse_hint() 1958 all_ = self._match(TokenType.ALL) 1959 distinct = self._match(TokenType.DISTINCT) 1960 1961 kind = ( 1962 self._match(TokenType.ALIAS) 1963 and self._match_texts(("STRUCT", "VALUE")) 1964 and self._prev.text 1965 ) 1966 1967 if distinct: 1968 distinct = self.expression( 1969 exp.Distinct, 1970 on=self._parse_value() if self._match(TokenType.ON) else None, 1971 ) 1972 1973 if all_ and distinct: 1974 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1975 1976 limit = self._parse_limit(top=True) 1977 expressions = self._parse_expressions() 1978 1979 this = self.expression( 1980 exp.Select, 1981 kind=kind, 1982 hint=hint, 1983 distinct=distinct, 1984 expressions=expressions, 1985 limit=limit, 1986 ) 1987 this.comments = comments 1988 1989 into = self._parse_into() 1990 if into: 1991 this.set("into", into) 1992 1993 from_ = self._parse_from() 1994 if from_: 1995 this.set("from", from_) 1996 1997 this = self._parse_query_modifiers(this) 1998 elif (table or nested) and self._match(TokenType.L_PAREN): 1999 if self._match(TokenType.PIVOT): 2000 this = self._parse_simplified_pivot() 2001 elif self._match(TokenType.FROM): 2002 this = exp.select("*").from_( 2003 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2004 ) 2005 else: 2006 this = self._parse_table() if table else self._parse_select(nested=True) 2007 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2008 2009 self._match_r_paren() 2010 2011 # We return early here so that the UNION isn't attached to the subquery by the 2012 # following call to _parse_set_operations, but instead becomes the parent node 2013 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2014 elif self._match(TokenType.VALUES): 2015 this = self.expression( 2016 exp.Values, 2017 expressions=self._parse_csv(self._parse_value), 2018 alias=self._parse_table_alias(), 2019 ) 2020 else: 2021 this = None 2022 2023 return self._parse_set_operations(this) 2024 2025 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2026 if not skip_with_token and not self._match(TokenType.WITH): 2027 return None 2028 2029 comments = self._prev_comments 2030 recursive = self._match(TokenType.RECURSIVE) 2031 2032 expressions = [] 2033 while True: 2034 expressions.append(self._parse_cte()) 2035 2036 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2037 break 2038 else: 2039 self._match(TokenType.WITH) 2040 2041 return self.expression( 2042 exp.With, comments=comments, expressions=expressions, recursive=recursive 2043 ) 2044 2045 def _parse_cte(self) -> exp.CTE: 2046 alias = self._parse_table_alias() 2047 if not alias or not alias.this: 2048 self.raise_error("Expected CTE to have alias") 2049 2050 self._match(TokenType.ALIAS) 2051 return self.expression( 2052 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2053 ) 2054 2055 def _parse_table_alias( 2056 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2057 ) -> t.Optional[exp.TableAlias]: 2058 any_token = self._match(TokenType.ALIAS) 2059 alias = ( 2060 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2061 or self._parse_string_as_identifier() 2062 ) 2063 2064 index = self._index 2065 if self._match(TokenType.L_PAREN): 2066 columns = self._parse_csv(self._parse_function_parameter) 2067 self._match_r_paren() if columns else self._retreat(index) 2068 else: 2069 columns = None 2070 2071 if not alias and not columns: 2072 return None 2073 2074 return self.expression(exp.TableAlias, this=alias, columns=columns) 2075 2076 def _parse_subquery( 2077 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2078 ) -> t.Optional[exp.Subquery]: 2079 if not this: 2080 return None 2081 2082 return self.expression( 2083 exp.Subquery, 2084 this=this, 2085 pivots=self._parse_pivots(), 2086 alias=self._parse_table_alias() if parse_alias else None, 2087 ) 2088 2089 def _parse_query_modifiers( 2090 self, this: t.Optional[exp.Expression] 2091 ) -> t.Optional[exp.Expression]: 2092 if isinstance(this, self.MODIFIABLES): 2093 for join in iter(self._parse_join, None): 2094 this.append("joins", join) 2095 for lateral in iter(self._parse_lateral, None): 2096 this.append("laterals", lateral) 2097 2098 while True: 2099 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2100 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2101 key, expression = parser(self) 2102 2103 if expression: 2104 this.set(key, expression) 2105 if key == "limit": 2106 offset = expression.args.pop("offset", None) 2107 if offset: 2108 this.set("offset", exp.Offset(expression=offset)) 2109 continue 2110 break 2111 return this 2112 2113 def _parse_hint(self) -> t.Optional[exp.Hint]: 2114 if self._match(TokenType.HINT): 2115 hints = [] 2116 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2117 hints.extend(hint) 2118 2119 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2120 self.raise_error("Expected */ after HINT") 2121 2122 return self.expression(exp.Hint, expressions=hints) 2123 2124 return None 2125 2126 def _parse_into(self) -> t.Optional[exp.Into]: 2127 if not self._match(TokenType.INTO): 2128 return None 2129 2130 temp = self._match(TokenType.TEMPORARY) 2131 unlogged = self._match_text_seq("UNLOGGED") 2132 self._match(TokenType.TABLE) 2133 2134 return self.expression( 2135 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2136 ) 2137 2138 def _parse_from( 2139 self, joins: bool = False, skip_from_token: bool = False 2140 ) -> t.Optional[exp.From]: 2141 if not skip_from_token and not self._match(TokenType.FROM): 2142 return None 2143 2144 return self.expression( 2145 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2146 ) 2147 2148 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2149 if not self._match(TokenType.MATCH_RECOGNIZE): 2150 return None 2151 2152 self._match_l_paren() 2153 2154 partition = self._parse_partition_by() 2155 order = self._parse_order() 2156 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2157 2158 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2159 rows = exp.var("ONE ROW PER MATCH") 2160 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2161 text = "ALL ROWS PER MATCH" 2162 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2163 text += f" SHOW EMPTY MATCHES" 2164 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2165 text += f" OMIT EMPTY MATCHES" 2166 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2167 text += f" WITH UNMATCHED ROWS" 2168 rows = exp.var(text) 2169 else: 2170 rows = None 2171 2172 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2173 text = "AFTER MATCH SKIP" 2174 if self._match_text_seq("PAST", "LAST", "ROW"): 2175 text += f" PAST LAST ROW" 2176 elif self._match_text_seq("TO", "NEXT", "ROW"): 2177 text += f" TO NEXT ROW" 2178 elif self._match_text_seq("TO", "FIRST"): 2179 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2180 elif self._match_text_seq("TO", "LAST"): 2181 text += f" TO LAST {self._advance_any().text}" # type: ignore 2182 after = exp.var(text) 2183 else: 2184 after = None 2185 2186 if self._match_text_seq("PATTERN"): 2187 self._match_l_paren() 2188 2189 if not self._curr: 2190 self.raise_error("Expecting )", self._curr) 2191 2192 paren = 1 2193 start = self._curr 2194 2195 while self._curr and paren > 0: 2196 if self._curr.token_type == TokenType.L_PAREN: 2197 paren += 1 2198 if self._curr.token_type == TokenType.R_PAREN: 2199 paren -= 1 2200 2201 end = self._prev 2202 self._advance() 2203 2204 if paren > 0: 2205 self.raise_error("Expecting )", self._curr) 2206 2207 pattern = exp.var(self._find_sql(start, end)) 2208 else: 2209 pattern = None 2210 2211 define = ( 2212 self._parse_csv( 2213 lambda: self.expression( 2214 exp.Alias, 2215 alias=self._parse_id_var(any_token=True), 2216 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2217 ) 2218 ) 2219 if self._match_text_seq("DEFINE") 2220 else None 2221 ) 2222 2223 self._match_r_paren() 2224 2225 return self.expression( 2226 exp.MatchRecognize, 2227 partition_by=partition, 2228 order=order, 2229 measures=measures, 2230 rows=rows, 2231 after=after, 2232 pattern=pattern, 2233 define=define, 2234 alias=self._parse_table_alias(), 2235 ) 2236 2237 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2238 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2239 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2240 2241 if outer_apply or cross_apply: 2242 this = self._parse_select(table=True) 2243 view = None 2244 outer = not cross_apply 2245 elif self._match(TokenType.LATERAL): 2246 this = self._parse_select(table=True) 2247 view = self._match(TokenType.VIEW) 2248 outer = self._match(TokenType.OUTER) 2249 else: 2250 return None 2251 2252 if not this: 2253 this = ( 2254 self._parse_unnest() 2255 or self._parse_function() 2256 or self._parse_id_var(any_token=False) 2257 ) 2258 2259 while self._match(TokenType.DOT): 2260 this = exp.Dot( 2261 this=this, 2262 expression=self._parse_function() or self._parse_id_var(any_token=False), 2263 ) 2264 2265 if view: 2266 table = self._parse_id_var(any_token=False) 2267 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2268 table_alias: t.Optional[exp.TableAlias] = self.expression( 2269 exp.TableAlias, this=table, columns=columns 2270 ) 2271 elif isinstance(this, exp.Subquery) and this.alias: 2272 # Ensures parity between the Subquery's and the Lateral's "alias" args 2273 table_alias = this.args["alias"].copy() 2274 else: 2275 table_alias = self._parse_table_alias() 2276 2277 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2278 2279 def _parse_join_parts( 2280 self, 2281 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2282 return ( 2283 self._match_set(self.JOIN_METHODS) and self._prev, 2284 self._match_set(self.JOIN_SIDES) and self._prev, 2285 self._match_set(self.JOIN_KINDS) and self._prev, 2286 ) 2287 2288 def _parse_join( 2289 self, skip_join_token: bool = False, parse_bracket: bool = False 2290 ) -> t.Optional[exp.Join]: 2291 if self._match(TokenType.COMMA): 2292 return self.expression(exp.Join, this=self._parse_table()) 2293 2294 index = self._index 2295 method, side, kind = self._parse_join_parts() 2296 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2297 join = self._match(TokenType.JOIN) 2298 2299 if not skip_join_token and not join: 2300 self._retreat(index) 2301 kind = None 2302 method = None 2303 side = None 2304 2305 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2306 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2307 2308 if not skip_join_token and not join and not outer_apply and not cross_apply: 2309 return None 2310 2311 if outer_apply: 2312 side = Token(TokenType.LEFT, "LEFT") 2313 2314 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2315 2316 if method: 2317 kwargs["method"] = method.text 2318 if side: 2319 kwargs["side"] = side.text 2320 if kind: 2321 kwargs["kind"] = kind.text 2322 if hint: 2323 kwargs["hint"] = hint 2324 2325 if self._match(TokenType.ON): 2326 kwargs["on"] = self._parse_conjunction() 2327 elif self._match(TokenType.USING): 2328 kwargs["using"] = self._parse_wrapped_id_vars() 2329 elif not (kind and kind.token_type == TokenType.CROSS): 2330 index = self._index 2331 joins = self._parse_joins() 2332 2333 if joins and self._match(TokenType.ON): 2334 kwargs["on"] = self._parse_conjunction() 2335 elif joins and self._match(TokenType.USING): 2336 kwargs["using"] = self._parse_wrapped_id_vars() 2337 else: 2338 joins = None 2339 self._retreat(index) 2340 2341 kwargs["this"].set("joins", joins) 2342 2343 return self.expression(exp.Join, **kwargs) 2344 2345 def _parse_index( 2346 self, 2347 index: t.Optional[exp.Expression] = None, 2348 ) -> t.Optional[exp.Index]: 2349 if index: 2350 unique = None 2351 primary = None 2352 amp = None 2353 2354 self._match(TokenType.ON) 2355 self._match(TokenType.TABLE) # hive 2356 table = self._parse_table_parts(schema=True) 2357 else: 2358 unique = self._match(TokenType.UNIQUE) 2359 primary = self._match_text_seq("PRIMARY") 2360 amp = self._match_text_seq("AMP") 2361 2362 if not self._match(TokenType.INDEX): 2363 return None 2364 2365 index = self._parse_id_var() 2366 table = None 2367 2368 using = self._parse_field() if self._match(TokenType.USING) else None 2369 2370 if self._match(TokenType.L_PAREN, advance=False): 2371 columns = self._parse_wrapped_csv(self._parse_ordered) 2372 else: 2373 columns = None 2374 2375 return self.expression( 2376 exp.Index, 2377 this=index, 2378 table=table, 2379 using=using, 2380 columns=columns, 2381 unique=unique, 2382 primary=primary, 2383 amp=amp, 2384 partition_by=self._parse_partition_by(), 2385 ) 2386 2387 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2388 hints: t.List[exp.Expression] = [] 2389 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2390 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2391 hints.append( 2392 self.expression( 2393 exp.WithTableHint, 2394 expressions=self._parse_csv( 2395 lambda: self._parse_function() or self._parse_var(any_token=True) 2396 ), 2397 ) 2398 ) 2399 self._match_r_paren() 2400 else: 2401 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2402 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2403 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2404 2405 self._match_texts({"INDEX", "KEY"}) 2406 if self._match(TokenType.FOR): 2407 hint.set("target", self._advance_any() and self._prev.text.upper()) 2408 2409 hint.set("expressions", self._parse_wrapped_id_vars()) 2410 hints.append(hint) 2411 2412 return hints or None 2413 2414 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2415 return ( 2416 (not schema and self._parse_function(optional_parens=False)) 2417 or self._parse_id_var(any_token=False) 2418 or self._parse_string_as_identifier() 2419 or self._parse_placeholder() 2420 ) 2421 2422 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2423 catalog = None 2424 db = None 2425 table = self._parse_table_part(schema=schema) 2426 2427 while self._match(TokenType.DOT): 2428 if catalog: 2429 # This allows nesting the table in arbitrarily many dot expressions if needed 2430 table = self.expression( 2431 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2432 ) 2433 else: 2434 catalog = db 2435 db = table 2436 table = self._parse_table_part(schema=schema) 2437 2438 if not table: 2439 self.raise_error(f"Expected table name but got {self._curr}") 2440 2441 return self.expression( 2442 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2443 ) 2444 2445 def _parse_table( 2446 self, 2447 schema: bool = False, 2448 joins: bool = False, 2449 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2450 parse_bracket: bool = False, 2451 ) -> t.Optional[exp.Expression]: 2452 lateral = self._parse_lateral() 2453 if lateral: 2454 return lateral 2455 2456 unnest = self._parse_unnest() 2457 if unnest: 2458 return unnest 2459 2460 values = self._parse_derived_table_values() 2461 if values: 2462 return values 2463 2464 subquery = self._parse_select(table=True) 2465 if subquery: 2466 if not subquery.args.get("pivots"): 2467 subquery.set("pivots", self._parse_pivots()) 2468 return subquery 2469 2470 bracket = parse_bracket and self._parse_bracket(None) 2471 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2472 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2473 2474 if schema: 2475 return self._parse_schema(this=this) 2476 2477 if self.ALIAS_POST_TABLESAMPLE: 2478 table_sample = self._parse_table_sample() 2479 2480 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2481 if alias: 2482 this.set("alias", alias) 2483 2484 if not this.args.get("pivots"): 2485 this.set("pivots", self._parse_pivots()) 2486 2487 this.set("hints", self._parse_table_hints()) 2488 2489 if not self.ALIAS_POST_TABLESAMPLE: 2490 table_sample = self._parse_table_sample() 2491 2492 if table_sample: 2493 table_sample.set("this", this) 2494 this = table_sample 2495 2496 if joins: 2497 for join in iter(self._parse_join, None): 2498 this.append("joins", join) 2499 2500 return this 2501 2502 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2503 if not self._match(TokenType.UNNEST): 2504 return None 2505 2506 expressions = self._parse_wrapped_csv(self._parse_type) 2507 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2508 2509 alias = self._parse_table_alias() if with_alias else None 2510 2511 if alias and self.UNNEST_COLUMN_ONLY: 2512 if alias.args.get("columns"): 2513 self.raise_error("Unexpected extra column alias in unnest.") 2514 2515 alias.set("columns", [alias.this]) 2516 alias.set("this", None) 2517 2518 offset = None 2519 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2520 self._match(TokenType.ALIAS) 2521 offset = self._parse_id_var() or exp.to_identifier("offset") 2522 2523 return self.expression( 2524 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2525 ) 2526 2527 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2528 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2529 if not is_derived and not self._match(TokenType.VALUES): 2530 return None 2531 2532 expressions = self._parse_csv(self._parse_value) 2533 alias = self._parse_table_alias() 2534 2535 if is_derived: 2536 self._match_r_paren() 2537 2538 return self.expression( 2539 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2540 ) 2541 2542 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2543 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2544 as_modifier and self._match_text_seq("USING", "SAMPLE") 2545 ): 2546 return None 2547 2548 bucket_numerator = None 2549 bucket_denominator = None 2550 bucket_field = None 2551 percent = None 2552 rows = None 2553 size = None 2554 seed = None 2555 2556 kind = ( 2557 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2558 ) 2559 method = self._parse_var(tokens=(TokenType.ROW,)) 2560 2561 self._match(TokenType.L_PAREN) 2562 2563 num = self._parse_number() 2564 2565 if self._match_text_seq("BUCKET"): 2566 bucket_numerator = self._parse_number() 2567 self._match_text_seq("OUT", "OF") 2568 bucket_denominator = bucket_denominator = self._parse_number() 2569 self._match(TokenType.ON) 2570 bucket_field = self._parse_field() 2571 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2572 percent = num 2573 elif self._match(TokenType.ROWS): 2574 rows = num 2575 else: 2576 size = num 2577 2578 self._match(TokenType.R_PAREN) 2579 2580 if self._match(TokenType.L_PAREN): 2581 method = self._parse_var() 2582 seed = self._match(TokenType.COMMA) and self._parse_number() 2583 self._match_r_paren() 2584 elif self._match_texts(("SEED", "REPEATABLE")): 2585 seed = self._parse_wrapped(self._parse_number) 2586 2587 return self.expression( 2588 exp.TableSample, 2589 method=method, 2590 bucket_numerator=bucket_numerator, 2591 bucket_denominator=bucket_denominator, 2592 bucket_field=bucket_field, 2593 percent=percent, 2594 rows=rows, 2595 size=size, 2596 seed=seed, 2597 kind=kind, 2598 ) 2599 2600 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2601 return list(iter(self._parse_pivot, None)) or None 2602 2603 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2604 return list(iter(self._parse_join, None)) or None 2605 2606 # https://duckdb.org/docs/sql/statements/pivot 2607 def _parse_simplified_pivot(self) -> exp.Pivot: 2608 def _parse_on() -> t.Optional[exp.Expression]: 2609 this = self._parse_bitwise() 2610 return self._parse_in(this) if self._match(TokenType.IN) else this 2611 2612 this = self._parse_table() 2613 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2614 using = self._match(TokenType.USING) and self._parse_csv( 2615 lambda: self._parse_alias(self._parse_function()) 2616 ) 2617 group = self._parse_group() 2618 return self.expression( 2619 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2620 ) 2621 2622 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2623 index = self._index 2624 2625 if self._match(TokenType.PIVOT): 2626 unpivot = False 2627 elif self._match(TokenType.UNPIVOT): 2628 unpivot = True 2629 else: 2630 return None 2631 2632 expressions = [] 2633 field = None 2634 2635 if not self._match(TokenType.L_PAREN): 2636 self._retreat(index) 2637 return None 2638 2639 if unpivot: 2640 expressions = self._parse_csv(self._parse_column) 2641 else: 2642 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2643 2644 if not expressions: 2645 self.raise_error("Failed to parse PIVOT's aggregation list") 2646 2647 if not self._match(TokenType.FOR): 2648 self.raise_error("Expecting FOR") 2649 2650 value = self._parse_column() 2651 2652 if not self._match(TokenType.IN): 2653 self.raise_error("Expecting IN") 2654 2655 field = self._parse_in(value, alias=True) 2656 2657 self._match_r_paren() 2658 2659 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2660 2661 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2662 pivot.set("alias", self._parse_table_alias()) 2663 2664 if not unpivot: 2665 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2666 2667 columns: t.List[exp.Expression] = [] 2668 for fld in pivot.args["field"].expressions: 2669 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2670 for name in names: 2671 if self.PREFIXED_PIVOT_COLUMNS: 2672 name = f"{name}_{field_name}" if name else field_name 2673 else: 2674 name = f"{field_name}_{name}" if name else field_name 2675 2676 columns.append(exp.to_identifier(name)) 2677 2678 pivot.set("columns", columns) 2679 2680 return pivot 2681 2682 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2683 return [agg.alias for agg in aggregations] 2684 2685 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2686 if not skip_where_token and not self._match(TokenType.WHERE): 2687 return None 2688 2689 return self.expression( 2690 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2691 ) 2692 2693 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2694 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2695 return None 2696 2697 elements = defaultdict(list) 2698 2699 if self._match(TokenType.ALL): 2700 return self.expression(exp.Group, all=True) 2701 2702 while True: 2703 expressions = self._parse_csv(self._parse_conjunction) 2704 if expressions: 2705 elements["expressions"].extend(expressions) 2706 2707 grouping_sets = self._parse_grouping_sets() 2708 if grouping_sets: 2709 elements["grouping_sets"].extend(grouping_sets) 2710 2711 rollup = None 2712 cube = None 2713 totals = None 2714 2715 with_ = self._match(TokenType.WITH) 2716 if self._match(TokenType.ROLLUP): 2717 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2718 elements["rollup"].extend(ensure_list(rollup)) 2719 2720 if self._match(TokenType.CUBE): 2721 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2722 elements["cube"].extend(ensure_list(cube)) 2723 2724 if self._match_text_seq("TOTALS"): 2725 totals = True 2726 elements["totals"] = True # type: ignore 2727 2728 if not (grouping_sets or rollup or cube or totals): 2729 break 2730 2731 return self.expression(exp.Group, **elements) # type: ignore 2732 2733 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2734 if not self._match(TokenType.GROUPING_SETS): 2735 return None 2736 2737 return self._parse_wrapped_csv(self._parse_grouping_set) 2738 2739 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2740 if self._match(TokenType.L_PAREN): 2741 grouping_set = self._parse_csv(self._parse_column) 2742 self._match_r_paren() 2743 return self.expression(exp.Tuple, expressions=grouping_set) 2744 2745 return self._parse_column() 2746 2747 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2748 if not skip_having_token and not self._match(TokenType.HAVING): 2749 return None 2750 return self.expression(exp.Having, this=self._parse_conjunction()) 2751 2752 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2753 if not self._match(TokenType.QUALIFY): 2754 return None 2755 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2756 2757 def _parse_order( 2758 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2759 ) -> t.Optional[exp.Expression]: 2760 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2761 return this 2762 2763 return self.expression( 2764 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2765 ) 2766 2767 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2768 if not self._match(token): 2769 return None 2770 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2771 2772 def _parse_ordered(self) -> exp.Ordered: 2773 this = self._parse_conjunction() 2774 self._match(TokenType.ASC) 2775 2776 is_desc = self._match(TokenType.DESC) 2777 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2778 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2779 desc = is_desc or False 2780 asc = not desc 2781 nulls_first = is_nulls_first or False 2782 explicitly_null_ordered = is_nulls_first or is_nulls_last 2783 2784 if ( 2785 not explicitly_null_ordered 2786 and ( 2787 (asc and self.NULL_ORDERING == "nulls_are_small") 2788 or (desc and self.NULL_ORDERING != "nulls_are_small") 2789 ) 2790 and self.NULL_ORDERING != "nulls_are_last" 2791 ): 2792 nulls_first = True 2793 2794 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2795 2796 def _parse_limit( 2797 self, this: t.Optional[exp.Expression] = None, top: bool = False 2798 ) -> t.Optional[exp.Expression]: 2799 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2800 comments = self._prev_comments 2801 if top: 2802 limit_paren = self._match(TokenType.L_PAREN) 2803 expression = self._parse_number() 2804 2805 if limit_paren: 2806 self._match_r_paren() 2807 else: 2808 expression = self._parse_term() 2809 2810 if self._match(TokenType.COMMA): 2811 offset = expression 2812 expression = self._parse_term() 2813 else: 2814 offset = None 2815 2816 limit_exp = self.expression( 2817 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2818 ) 2819 2820 return limit_exp 2821 2822 if self._match(TokenType.FETCH): 2823 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2824 direction = self._prev.text if direction else "FIRST" 2825 2826 count = self._parse_number() 2827 percent = self._match(TokenType.PERCENT) 2828 2829 self._match_set((TokenType.ROW, TokenType.ROWS)) 2830 2831 only = self._match_text_seq("ONLY") 2832 with_ties = self._match_text_seq("WITH", "TIES") 2833 2834 if only and with_ties: 2835 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2836 2837 return self.expression( 2838 exp.Fetch, 2839 direction=direction, 2840 count=count, 2841 percent=percent, 2842 with_ties=with_ties, 2843 ) 2844 2845 return this 2846 2847 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2848 if not self._match(TokenType.OFFSET): 2849 return this 2850 2851 count = self._parse_term() 2852 self._match_set((TokenType.ROW, TokenType.ROWS)) 2853 return self.expression(exp.Offset, this=this, expression=count) 2854 2855 def _parse_locks(self) -> t.List[exp.Lock]: 2856 locks = [] 2857 while True: 2858 if self._match_text_seq("FOR", "UPDATE"): 2859 update = True 2860 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2861 "LOCK", "IN", "SHARE", "MODE" 2862 ): 2863 update = False 2864 else: 2865 break 2866 2867 expressions = None 2868 if self._match_text_seq("OF"): 2869 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2870 2871 wait: t.Optional[bool | exp.Expression] = None 2872 if self._match_text_seq("NOWAIT"): 2873 wait = True 2874 elif self._match_text_seq("WAIT"): 2875 wait = self._parse_primary() 2876 elif self._match_text_seq("SKIP", "LOCKED"): 2877 wait = False 2878 2879 locks.append( 2880 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2881 ) 2882 2883 return locks 2884 2885 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2886 if not self._match_set(self.SET_OPERATIONS): 2887 return this 2888 2889 token_type = self._prev.token_type 2890 2891 if token_type == TokenType.UNION: 2892 expression = exp.Union 2893 elif token_type == TokenType.EXCEPT: 2894 expression = exp.Except 2895 else: 2896 expression = exp.Intersect 2897 2898 return self.expression( 2899 expression, 2900 this=this, 2901 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2902 expression=self._parse_set_operations(self._parse_select(nested=True)), 2903 ) 2904 2905 def _parse_expression(self) -> t.Optional[exp.Expression]: 2906 return self._parse_alias(self._parse_conjunction()) 2907 2908 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2909 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2910 2911 def _parse_equality(self) -> t.Optional[exp.Expression]: 2912 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2913 2914 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2915 return self._parse_tokens(self._parse_range, self.COMPARISON) 2916 2917 def _parse_range(self) -> t.Optional[exp.Expression]: 2918 this = self._parse_bitwise() 2919 negate = self._match(TokenType.NOT) 2920 2921 if self._match_set(self.RANGE_PARSERS): 2922 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2923 if not expression: 2924 return this 2925 2926 this = expression 2927 elif self._match(TokenType.ISNULL): 2928 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2929 2930 # Postgres supports ISNULL and NOTNULL for conditions. 2931 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2932 if self._match(TokenType.NOTNULL): 2933 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2934 this = self.expression(exp.Not, this=this) 2935 2936 if negate: 2937 this = self.expression(exp.Not, this=this) 2938 2939 if self._match(TokenType.IS): 2940 this = self._parse_is(this) 2941 2942 return this 2943 2944 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2945 index = self._index - 1 2946 negate = self._match(TokenType.NOT) 2947 2948 if self._match_text_seq("DISTINCT", "FROM"): 2949 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2950 return self.expression(klass, this=this, expression=self._parse_expression()) 2951 2952 expression = self._parse_null() or self._parse_boolean() 2953 if not expression: 2954 self._retreat(index) 2955 return None 2956 2957 this = self.expression(exp.Is, this=this, expression=expression) 2958 return self.expression(exp.Not, this=this) if negate else this 2959 2960 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2961 unnest = self._parse_unnest(with_alias=False) 2962 if unnest: 2963 this = self.expression(exp.In, this=this, unnest=unnest) 2964 elif self._match(TokenType.L_PAREN): 2965 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2966 2967 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2968 this = self.expression(exp.In, this=this, query=expressions[0]) 2969 else: 2970 this = self.expression(exp.In, this=this, expressions=expressions) 2971 2972 self._match_r_paren(this) 2973 else: 2974 this = self.expression(exp.In, this=this, field=self._parse_field()) 2975 2976 return this 2977 2978 def _parse_between(self, this: exp.Expression) -> exp.Between: 2979 low = self._parse_bitwise() 2980 self._match(TokenType.AND) 2981 high = self._parse_bitwise() 2982 return self.expression(exp.Between, this=this, low=low, high=high) 2983 2984 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2985 if not self._match(TokenType.ESCAPE): 2986 return this 2987 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2988 2989 def _parse_interval(self) -> t.Optional[exp.Interval]: 2990 if not self._match(TokenType.INTERVAL): 2991 return None 2992 2993 if self._match(TokenType.STRING, advance=False): 2994 this = self._parse_primary() 2995 else: 2996 this = self._parse_term() 2997 2998 unit = self._parse_function() or self._parse_var() 2999 3000 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3001 # each INTERVAL expression into this canonical form so it's easy to transpile 3002 if this and this.is_number: 3003 this = exp.Literal.string(this.name) 3004 elif this and this.is_string: 3005 parts = this.name.split() 3006 3007 if len(parts) == 2: 3008 if unit: 3009 # this is not actually a unit, it's something else 3010 unit = None 3011 self._retreat(self._index - 1) 3012 else: 3013 this = exp.Literal.string(parts[0]) 3014 unit = self.expression(exp.Var, this=parts[1]) 3015 3016 return self.expression(exp.Interval, this=this, unit=unit) 3017 3018 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3019 this = self._parse_term() 3020 3021 while True: 3022 if self._match_set(self.BITWISE): 3023 this = self.expression( 3024 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3025 ) 3026 elif self._match_pair(TokenType.LT, TokenType.LT): 3027 this = self.expression( 3028 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3029 ) 3030 elif self._match_pair(TokenType.GT, TokenType.GT): 3031 this = self.expression( 3032 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3033 ) 3034 else: 3035 break 3036 3037 return this 3038 3039 def _parse_term(self) -> t.Optional[exp.Expression]: 3040 return self._parse_tokens(self._parse_factor, self.TERM) 3041 3042 def _parse_factor(self) -> t.Optional[exp.Expression]: 3043 return self._parse_tokens(self._parse_unary, self.FACTOR) 3044 3045 def _parse_unary(self) -> t.Optional[exp.Expression]: 3046 if self._match_set(self.UNARY_PARSERS): 3047 return self.UNARY_PARSERS[self._prev.token_type](self) 3048 return self._parse_at_time_zone(self._parse_type()) 3049 3050 def _parse_type(self) -> t.Optional[exp.Expression]: 3051 interval = self._parse_interval() 3052 if interval: 3053 return interval 3054 3055 index = self._index 3056 data_type = self._parse_types(check_func=True) 3057 this = self._parse_column() 3058 3059 if data_type: 3060 if isinstance(this, exp.Literal): 3061 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3062 if parser: 3063 return parser(self, this, data_type) 3064 return self.expression(exp.Cast, this=this, to=data_type) 3065 if not data_type.expressions: 3066 self._retreat(index) 3067 return self._parse_column() 3068 return self._parse_column_ops(data_type) 3069 3070 return this 3071 3072 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3073 this = self._parse_type() 3074 if not this: 3075 return None 3076 3077 return self.expression( 3078 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3079 ) 3080 3081 def _parse_types( 3082 self, check_func: bool = False, schema: bool = False 3083 ) -> t.Optional[exp.Expression]: 3084 index = self._index 3085 3086 prefix = self._match_text_seq("SYSUDTLIB", ".") 3087 3088 if not self._match_set(self.TYPE_TOKENS): 3089 return None 3090 3091 type_token = self._prev.token_type 3092 3093 if type_token == TokenType.PSEUDO_TYPE: 3094 return self.expression(exp.PseudoType, this=self._prev.text) 3095 3096 nested = type_token in self.NESTED_TYPE_TOKENS 3097 is_struct = type_token == TokenType.STRUCT 3098 expressions = None 3099 maybe_func = False 3100 3101 if self._match(TokenType.L_PAREN): 3102 if is_struct: 3103 expressions = self._parse_csv(self._parse_struct_types) 3104 elif nested: 3105 expressions = self._parse_csv( 3106 lambda: self._parse_types(check_func=check_func, schema=schema) 3107 ) 3108 elif type_token in self.ENUM_TYPE_TOKENS: 3109 expressions = self._parse_csv(self._parse_primary) 3110 else: 3111 expressions = self._parse_csv(self._parse_type_size) 3112 3113 if not expressions or not self._match(TokenType.R_PAREN): 3114 self._retreat(index) 3115 return None 3116 3117 maybe_func = True 3118 3119 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3120 this = exp.DataType( 3121 this=exp.DataType.Type.ARRAY, 3122 expressions=[ 3123 exp.DataType( 3124 this=exp.DataType.Type[type_token.value], 3125 expressions=expressions, 3126 nested=nested, 3127 ) 3128 ], 3129 nested=True, 3130 ) 3131 3132 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3133 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3134 3135 return this 3136 3137 if self._match(TokenType.L_BRACKET): 3138 self._retreat(index) 3139 return None 3140 3141 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3142 if nested and self._match(TokenType.LT): 3143 if is_struct: 3144 expressions = self._parse_csv(self._parse_struct_types) 3145 else: 3146 expressions = self._parse_csv( 3147 lambda: self._parse_types(check_func=check_func, schema=schema) 3148 ) 3149 3150 if not self._match(TokenType.GT): 3151 self.raise_error("Expecting >") 3152 3153 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3154 values = self._parse_csv(self._parse_conjunction) 3155 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3156 3157 value: t.Optional[exp.Expression] = None 3158 if type_token in self.TIMESTAMPS: 3159 if self._match_text_seq("WITH", "TIME", "ZONE"): 3160 maybe_func = False 3161 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3162 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3163 maybe_func = False 3164 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3165 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3166 maybe_func = False 3167 elif type_token == TokenType.INTERVAL: 3168 unit = self._parse_var() 3169 3170 if not unit: 3171 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3172 else: 3173 value = self.expression(exp.Interval, unit=unit) 3174 3175 if maybe_func and check_func: 3176 index2 = self._index 3177 peek = self._parse_string() 3178 3179 if not peek: 3180 self._retreat(index) 3181 return None 3182 3183 self._retreat(index2) 3184 3185 if value: 3186 return value 3187 3188 return exp.DataType( 3189 this=exp.DataType.Type[type_token.value], 3190 expressions=expressions, 3191 nested=nested, 3192 values=values, 3193 prefix=prefix, 3194 ) 3195 3196 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3197 this = self._parse_type() or self._parse_id_var() 3198 self._match(TokenType.COLON) 3199 return self._parse_column_def(this) 3200 3201 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3202 if not self._match_text_seq("AT", "TIME", "ZONE"): 3203 return this 3204 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3205 3206 def _parse_column(self) -> t.Optional[exp.Expression]: 3207 this = self._parse_field() 3208 if isinstance(this, exp.Identifier): 3209 this = self.expression(exp.Column, this=this) 3210 elif not this: 3211 return self._parse_bracket(this) 3212 return self._parse_column_ops(this) 3213 3214 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3215 this = self._parse_bracket(this) 3216 3217 while self._match_set(self.COLUMN_OPERATORS): 3218 op_token = self._prev.token_type 3219 op = self.COLUMN_OPERATORS.get(op_token) 3220 3221 if op_token == TokenType.DCOLON: 3222 field = self._parse_types() 3223 if not field: 3224 self.raise_error("Expected type") 3225 elif op and self._curr: 3226 self._advance() 3227 value = self._prev.text 3228 field = ( 3229 exp.Literal.number(value) 3230 if self._prev.token_type == TokenType.NUMBER 3231 else exp.Literal.string(value) 3232 ) 3233 else: 3234 field = self._parse_field(anonymous_func=True, any_token=True) 3235 3236 if isinstance(field, exp.Func): 3237 # bigquery allows function calls like x.y.count(...) 3238 # SAFE.SUBSTR(...) 3239 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3240 this = self._replace_columns_with_dots(this) 3241 3242 if op: 3243 this = op(self, this, field) 3244 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3245 this = self.expression( 3246 exp.Column, 3247 this=field, 3248 table=this.this, 3249 db=this.args.get("table"), 3250 catalog=this.args.get("db"), 3251 ) 3252 else: 3253 this = self.expression(exp.Dot, this=this, expression=field) 3254 this = self._parse_bracket(this) 3255 return this 3256 3257 def _parse_primary(self) -> t.Optional[exp.Expression]: 3258 if self._match_set(self.PRIMARY_PARSERS): 3259 token_type = self._prev.token_type 3260 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3261 3262 if token_type == TokenType.STRING: 3263 expressions = [primary] 3264 while self._match(TokenType.STRING): 3265 expressions.append(exp.Literal.string(self._prev.text)) 3266 3267 if len(expressions) > 1: 3268 return self.expression(exp.Concat, expressions=expressions) 3269 3270 return primary 3271 3272 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3273 return exp.Literal.number(f"0.{self._prev.text}") 3274 3275 if self._match(TokenType.L_PAREN): 3276 comments = self._prev_comments 3277 query = self._parse_select() 3278 3279 if query: 3280 expressions = [query] 3281 else: 3282 expressions = self._parse_expressions() 3283 3284 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3285 3286 if isinstance(this, exp.Subqueryable): 3287 this = self._parse_set_operations( 3288 self._parse_subquery(this=this, parse_alias=False) 3289 ) 3290 elif len(expressions) > 1: 3291 this = self.expression(exp.Tuple, expressions=expressions) 3292 else: 3293 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3294 3295 if this: 3296 this.add_comments(comments) 3297 3298 self._match_r_paren(expression=this) 3299 return this 3300 3301 return None 3302 3303 def _parse_field( 3304 self, 3305 any_token: bool = False, 3306 tokens: t.Optional[t.Collection[TokenType]] = None, 3307 anonymous_func: bool = False, 3308 ) -> t.Optional[exp.Expression]: 3309 return ( 3310 self._parse_primary() 3311 or self._parse_function(anonymous=anonymous_func) 3312 or self._parse_id_var(any_token=any_token, tokens=tokens) 3313 ) 3314 3315 def _parse_function( 3316 self, 3317 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3318 anonymous: bool = False, 3319 optional_parens: bool = True, 3320 ) -> t.Optional[exp.Expression]: 3321 if not self._curr: 3322 return None 3323 3324 token_type = self._curr.token_type 3325 3326 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3327 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3328 3329 if not self._next or self._next.token_type != TokenType.L_PAREN: 3330 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3331 self._advance() 3332 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3333 3334 return None 3335 3336 if token_type not in self.FUNC_TOKENS: 3337 return None 3338 3339 this = self._curr.text 3340 upper = this.upper() 3341 self._advance(2) 3342 3343 parser = self.FUNCTION_PARSERS.get(upper) 3344 3345 if parser and not anonymous: 3346 this = parser(self) 3347 else: 3348 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3349 3350 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3351 this = self.expression(subquery_predicate, this=self._parse_select()) 3352 self._match_r_paren() 3353 return this 3354 3355 if functions is None: 3356 functions = self.FUNCTIONS 3357 3358 function = functions.get(upper) 3359 3360 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3361 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3362 3363 if function and not anonymous: 3364 this = self.validate_expression(function(args), args) 3365 else: 3366 this = self.expression(exp.Anonymous, this=this, expressions=args) 3367 3368 self._match(TokenType.R_PAREN, expression=this) 3369 return self._parse_window(this) 3370 3371 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3372 return self._parse_column_def(self._parse_id_var()) 3373 3374 def _parse_user_defined_function( 3375 self, kind: t.Optional[TokenType] = None 3376 ) -> t.Optional[exp.Expression]: 3377 this = self._parse_id_var() 3378 3379 while self._match(TokenType.DOT): 3380 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3381 3382 if not self._match(TokenType.L_PAREN): 3383 return this 3384 3385 expressions = self._parse_csv(self._parse_function_parameter) 3386 self._match_r_paren() 3387 return self.expression( 3388 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3389 ) 3390 3391 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3392 literal = self._parse_primary() 3393 if literal: 3394 return self.expression(exp.Introducer, this=token.text, expression=literal) 3395 3396 return self.expression(exp.Identifier, this=token.text) 3397 3398 def _parse_session_parameter(self) -> exp.SessionParameter: 3399 kind = None 3400 this = self._parse_id_var() or self._parse_primary() 3401 3402 if this and self._match(TokenType.DOT): 3403 kind = this.name 3404 this = self._parse_var() or self._parse_primary() 3405 3406 return self.expression(exp.SessionParameter, this=this, kind=kind) 3407 3408 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3409 index = self._index 3410 3411 if self._match(TokenType.L_PAREN): 3412 expressions = self._parse_csv(self._parse_id_var) 3413 3414 if not self._match(TokenType.R_PAREN): 3415 self._retreat(index) 3416 else: 3417 expressions = [self._parse_id_var()] 3418 3419 if self._match_set(self.LAMBDAS): 3420 return self.LAMBDAS[self._prev.token_type](self, expressions) 3421 3422 self._retreat(index) 3423 3424 this: t.Optional[exp.Expression] 3425 3426 if self._match(TokenType.DISTINCT): 3427 this = self.expression( 3428 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3429 ) 3430 else: 3431 this = self._parse_select_or_expression(alias=alias) 3432 3433 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3434 3435 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3436 index = self._index 3437 3438 if not self.errors: 3439 try: 3440 if self._parse_select(nested=True): 3441 return this 3442 except ParseError: 3443 pass 3444 finally: 3445 self.errors.clear() 3446 self._retreat(index) 3447 3448 if not self._match(TokenType.L_PAREN): 3449 return this 3450 3451 args = self._parse_csv( 3452 lambda: self._parse_constraint() 3453 or self._parse_column_def(self._parse_field(any_token=True)) 3454 ) 3455 3456 self._match_r_paren() 3457 return self.expression(exp.Schema, this=this, expressions=args) 3458 3459 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3460 # column defs are not really columns, they're identifiers 3461 if isinstance(this, exp.Column): 3462 this = this.this 3463 3464 kind = self._parse_types(schema=True) 3465 3466 if self._match_text_seq("FOR", "ORDINALITY"): 3467 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3468 3469 constraints = [] 3470 while True: 3471 constraint = self._parse_column_constraint() 3472 if not constraint: 3473 break 3474 constraints.append(constraint) 3475 3476 if not kind and not constraints: 3477 return this 3478 3479 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3480 3481 def _parse_auto_increment( 3482 self, 3483 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3484 start = None 3485 increment = None 3486 3487 if self._match(TokenType.L_PAREN, advance=False): 3488 args = self._parse_wrapped_csv(self._parse_bitwise) 3489 start = seq_get(args, 0) 3490 increment = seq_get(args, 1) 3491 elif self._match_text_seq("START"): 3492 start = self._parse_bitwise() 3493 self._match_text_seq("INCREMENT") 3494 increment = self._parse_bitwise() 3495 3496 if start and increment: 3497 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3498 3499 return exp.AutoIncrementColumnConstraint() 3500 3501 def _parse_compress(self) -> exp.CompressColumnConstraint: 3502 if self._match(TokenType.L_PAREN, advance=False): 3503 return self.expression( 3504 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3505 ) 3506 3507 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3508 3509 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3510 if self._match_text_seq("BY", "DEFAULT"): 3511 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3512 this = self.expression( 3513 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3514 ) 3515 else: 3516 self._match_text_seq("ALWAYS") 3517 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3518 3519 self._match(TokenType.ALIAS) 3520 identity = self._match_text_seq("IDENTITY") 3521 3522 if self._match(TokenType.L_PAREN): 3523 if self._match_text_seq("START", "WITH"): 3524 this.set("start", self._parse_bitwise()) 3525 if self._match_text_seq("INCREMENT", "BY"): 3526 this.set("increment", self._parse_bitwise()) 3527 if self._match_text_seq("MINVALUE"): 3528 this.set("minvalue", self._parse_bitwise()) 3529 if self._match_text_seq("MAXVALUE"): 3530 this.set("maxvalue", self._parse_bitwise()) 3531 3532 if self._match_text_seq("CYCLE"): 3533 this.set("cycle", True) 3534 elif self._match_text_seq("NO", "CYCLE"): 3535 this.set("cycle", False) 3536 3537 if not identity: 3538 this.set("expression", self._parse_bitwise()) 3539 3540 self._match_r_paren() 3541 3542 return this 3543 3544 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3545 self._match_text_seq("LENGTH") 3546 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3547 3548 def _parse_not_constraint( 3549 self, 3550 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3551 if self._match_text_seq("NULL"): 3552 return self.expression(exp.NotNullColumnConstraint) 3553 if self._match_text_seq("CASESPECIFIC"): 3554 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3555 return None 3556 3557 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3558 if self._match(TokenType.CONSTRAINT): 3559 this = self._parse_id_var() 3560 else: 3561 this = None 3562 3563 if self._match_texts(self.CONSTRAINT_PARSERS): 3564 return self.expression( 3565 exp.ColumnConstraint, 3566 this=this, 3567 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3568 ) 3569 3570 return this 3571 3572 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3573 if not self._match(TokenType.CONSTRAINT): 3574 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3575 3576 this = self._parse_id_var() 3577 expressions = [] 3578 3579 while True: 3580 constraint = self._parse_unnamed_constraint() or self._parse_function() 3581 if not constraint: 3582 break 3583 expressions.append(constraint) 3584 3585 return self.expression(exp.Constraint, this=this, expressions=expressions) 3586 3587 def _parse_unnamed_constraint( 3588 self, constraints: t.Optional[t.Collection[str]] = None 3589 ) -> t.Optional[exp.Expression]: 3590 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3591 return None 3592 3593 constraint = self._prev.text.upper() 3594 if constraint not in self.CONSTRAINT_PARSERS: 3595 self.raise_error(f"No parser found for schema constraint {constraint}.") 3596 3597 return self.CONSTRAINT_PARSERS[constraint](self) 3598 3599 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3600 self._match_text_seq("KEY") 3601 return self.expression( 3602 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3603 ) 3604 3605 def _parse_key_constraint_options(self) -> t.List[str]: 3606 options = [] 3607 while True: 3608 if not self._curr: 3609 break 3610 3611 if self._match(TokenType.ON): 3612 action = None 3613 on = self._advance_any() and self._prev.text 3614 3615 if self._match_text_seq("NO", "ACTION"): 3616 action = "NO ACTION" 3617 elif self._match_text_seq("CASCADE"): 3618 action = "CASCADE" 3619 elif self._match_pair(TokenType.SET, TokenType.NULL): 3620 action = "SET NULL" 3621 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3622 action = "SET DEFAULT" 3623 else: 3624 self.raise_error("Invalid key constraint") 3625 3626 options.append(f"ON {on} {action}") 3627 elif self._match_text_seq("NOT", "ENFORCED"): 3628 options.append("NOT ENFORCED") 3629 elif self._match_text_seq("DEFERRABLE"): 3630 options.append("DEFERRABLE") 3631 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3632 options.append("INITIALLY DEFERRED") 3633 elif self._match_text_seq("NORELY"): 3634 options.append("NORELY") 3635 elif self._match_text_seq("MATCH", "FULL"): 3636 options.append("MATCH FULL") 3637 else: 3638 break 3639 3640 return options 3641 3642 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3643 if match and not self._match(TokenType.REFERENCES): 3644 return None 3645 3646 expressions = None 3647 this = self._parse_table(schema=True) 3648 options = self._parse_key_constraint_options() 3649 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3650 3651 def _parse_foreign_key(self) -> exp.ForeignKey: 3652 expressions = self._parse_wrapped_id_vars() 3653 reference = self._parse_references() 3654 options = {} 3655 3656 while self._match(TokenType.ON): 3657 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3658 self.raise_error("Expected DELETE or UPDATE") 3659 3660 kind = self._prev.text.lower() 3661 3662 if self._match_text_seq("NO", "ACTION"): 3663 action = "NO ACTION" 3664 elif self._match(TokenType.SET): 3665 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3666 action = "SET " + self._prev.text.upper() 3667 else: 3668 self._advance() 3669 action = self._prev.text.upper() 3670 3671 options[kind] = action 3672 3673 return self.expression( 3674 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3675 ) 3676 3677 def _parse_primary_key( 3678 self, wrapped_optional: bool = False, in_props: bool = False 3679 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3680 desc = ( 3681 self._match_set((TokenType.ASC, TokenType.DESC)) 3682 and self._prev.token_type == TokenType.DESC 3683 ) 3684 3685 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3686 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3687 3688 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3689 options = self._parse_key_constraint_options() 3690 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3691 3692 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3693 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3694 return this 3695 3696 bracket_kind = self._prev.token_type 3697 3698 if self._match(TokenType.COLON): 3699 expressions: t.List[t.Optional[exp.Expression]] = [ 3700 self.expression(exp.Slice, expression=self._parse_conjunction()) 3701 ] 3702 else: 3703 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3704 3705 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3706 if bracket_kind == TokenType.L_BRACE: 3707 this = self.expression(exp.Struct, expressions=expressions) 3708 elif not this or this.name.upper() == "ARRAY": 3709 this = self.expression(exp.Array, expressions=expressions) 3710 else: 3711 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3712 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3713 3714 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3715 self.raise_error("Expected ]") 3716 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3717 self.raise_error("Expected }") 3718 3719 self._add_comments(this) 3720 return self._parse_bracket(this) 3721 3722 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3723 if self._match(TokenType.COLON): 3724 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3725 return this 3726 3727 def _parse_case(self) -> t.Optional[exp.Expression]: 3728 ifs = [] 3729 default = None 3730 3731 expression = self._parse_conjunction() 3732 3733 while self._match(TokenType.WHEN): 3734 this = self._parse_conjunction() 3735 self._match(TokenType.THEN) 3736 then = self._parse_conjunction() 3737 ifs.append(self.expression(exp.If, this=this, true=then)) 3738 3739 if self._match(TokenType.ELSE): 3740 default = self._parse_conjunction() 3741 3742 if not self._match(TokenType.END): 3743 self.raise_error("Expected END after CASE", self._prev) 3744 3745 return self._parse_window( 3746 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3747 ) 3748 3749 def _parse_if(self) -> t.Optional[exp.Expression]: 3750 if self._match(TokenType.L_PAREN): 3751 args = self._parse_csv(self._parse_conjunction) 3752 this = self.validate_expression(exp.If.from_arg_list(args), args) 3753 self._match_r_paren() 3754 else: 3755 index = self._index - 1 3756 condition = self._parse_conjunction() 3757 3758 if not condition: 3759 self._retreat(index) 3760 return None 3761 3762 self._match(TokenType.THEN) 3763 true = self._parse_conjunction() 3764 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3765 self._match(TokenType.END) 3766 this = self.expression(exp.If, this=condition, true=true, false=false) 3767 3768 return self._parse_window(this) 3769 3770 def _parse_extract(self) -> exp.Extract: 3771 this = self._parse_function() or self._parse_var() or self._parse_type() 3772 3773 if self._match(TokenType.FROM): 3774 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3775 3776 if not self._match(TokenType.COMMA): 3777 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3778 3779 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3780 3781 def _parse_any_value(self) -> exp.AnyValue: 3782 this = self._parse_lambda() 3783 is_max = None 3784 having = None 3785 3786 if self._match(TokenType.HAVING): 3787 self._match_texts(("MAX", "MIN")) 3788 is_max = self._prev.text == "MAX" 3789 having = self._parse_column() 3790 3791 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3792 3793 def _parse_cast(self, strict: bool) -> exp.Expression: 3794 this = self._parse_conjunction() 3795 3796 if not self._match(TokenType.ALIAS): 3797 if self._match(TokenType.COMMA): 3798 return self.expression( 3799 exp.CastToStrType, this=this, expression=self._parse_string() 3800 ) 3801 else: 3802 self.raise_error("Expected AS after CAST") 3803 3804 fmt = None 3805 to = self._parse_types() 3806 3807 if not to: 3808 self.raise_error("Expected TYPE after CAST") 3809 elif to.this == exp.DataType.Type.CHAR: 3810 if self._match(TokenType.CHARACTER_SET): 3811 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3812 elif self._match(TokenType.FORMAT): 3813 fmt_string = self._parse_string() 3814 fmt = self._parse_at_time_zone(fmt_string) 3815 3816 if to.this in exp.DataType.TEMPORAL_TYPES: 3817 this = self.expression( 3818 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3819 this=this, 3820 format=exp.Literal.string( 3821 format_time( 3822 fmt_string.this if fmt_string else "", 3823 self.FORMAT_MAPPING or self.TIME_MAPPING, 3824 self.FORMAT_TRIE or self.TIME_TRIE, 3825 ) 3826 ), 3827 ) 3828 3829 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3830 this.set("zone", fmt.args["zone"]) 3831 3832 return this 3833 3834 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3835 3836 def _parse_concat(self) -> t.Optional[exp.Expression]: 3837 args = self._parse_csv(self._parse_conjunction) 3838 if self.CONCAT_NULL_OUTPUTS_STRING: 3839 args = [ 3840 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3841 for arg in args 3842 if arg 3843 ] 3844 3845 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3846 # we find such a call we replace it with its argument. 3847 if len(args) == 1: 3848 return args[0] 3849 3850 return self.expression( 3851 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3852 ) 3853 3854 def _parse_string_agg(self) -> exp.Expression: 3855 if self._match(TokenType.DISTINCT): 3856 args: t.List[t.Optional[exp.Expression]] = [ 3857 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3858 ] 3859 if self._match(TokenType.COMMA): 3860 args.extend(self._parse_csv(self._parse_conjunction)) 3861 else: 3862 args = self._parse_csv(self._parse_conjunction) 3863 3864 index = self._index 3865 if not self._match(TokenType.R_PAREN) and args: 3866 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3867 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 3868 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 3869 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 3870 3871 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3872 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3873 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3874 if not self._match_text_seq("WITHIN", "GROUP"): 3875 self._retreat(index) 3876 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3877 3878 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3879 order = self._parse_order(this=seq_get(args, 0)) 3880 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3881 3882 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3883 this = self._parse_bitwise() 3884 3885 if self._match(TokenType.USING): 3886 to: t.Optional[exp.Expression] = self.expression( 3887 exp.CharacterSet, this=self._parse_var() 3888 ) 3889 elif self._match(TokenType.COMMA): 3890 to = self._parse_types() 3891 else: 3892 to = None 3893 3894 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3895 3896 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3897 """ 3898 There are generally two variants of the DECODE function: 3899 3900 - DECODE(bin, charset) 3901 - DECODE(expression, search, result [, search, result] ... [, default]) 3902 3903 The second variant will always be parsed into a CASE expression. Note that NULL 3904 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3905 instead of relying on pattern matching. 3906 """ 3907 args = self._parse_csv(self._parse_conjunction) 3908 3909 if len(args) < 3: 3910 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3911 3912 expression, *expressions = args 3913 if not expression: 3914 return None 3915 3916 ifs = [] 3917 for search, result in zip(expressions[::2], expressions[1::2]): 3918 if not search or not result: 3919 return None 3920 3921 if isinstance(search, exp.Literal): 3922 ifs.append( 3923 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3924 ) 3925 elif isinstance(search, exp.Null): 3926 ifs.append( 3927 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3928 ) 3929 else: 3930 cond = exp.or_( 3931 exp.EQ(this=expression.copy(), expression=search), 3932 exp.and_( 3933 exp.Is(this=expression.copy(), expression=exp.Null()), 3934 exp.Is(this=search.copy(), expression=exp.Null()), 3935 copy=False, 3936 ), 3937 copy=False, 3938 ) 3939 ifs.append(exp.If(this=cond, true=result)) 3940 3941 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3942 3943 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3944 self._match_text_seq("KEY") 3945 key = self._parse_field() 3946 self._match(TokenType.COLON) 3947 self._match_text_seq("VALUE") 3948 value = self._parse_field() 3949 3950 if not key and not value: 3951 return None 3952 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3953 3954 def _parse_json_object(self) -> exp.JSONObject: 3955 star = self._parse_star() 3956 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3957 3958 null_handling = None 3959 if self._match_text_seq("NULL", "ON", "NULL"): 3960 null_handling = "NULL ON NULL" 3961 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3962 null_handling = "ABSENT ON NULL" 3963 3964 unique_keys = None 3965 if self._match_text_seq("WITH", "UNIQUE"): 3966 unique_keys = True 3967 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3968 unique_keys = False 3969 3970 self._match_text_seq("KEYS") 3971 3972 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3973 format_json = self._match_text_seq("FORMAT", "JSON") 3974 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3975 3976 return self.expression( 3977 exp.JSONObject, 3978 expressions=expressions, 3979 null_handling=null_handling, 3980 unique_keys=unique_keys, 3981 return_type=return_type, 3982 format_json=format_json, 3983 encoding=encoding, 3984 ) 3985 3986 def _parse_logarithm(self) -> exp.Func: 3987 # Default argument order is base, expression 3988 args = self._parse_csv(self._parse_range) 3989 3990 if len(args) > 1: 3991 if not self.LOG_BASE_FIRST: 3992 args.reverse() 3993 return exp.Log.from_arg_list(args) 3994 3995 return self.expression( 3996 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3997 ) 3998 3999 def _parse_match_against(self) -> exp.MatchAgainst: 4000 expressions = self._parse_csv(self._parse_column) 4001 4002 self._match_text_seq(")", "AGAINST", "(") 4003 4004 this = self._parse_string() 4005 4006 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4007 modifier = "IN NATURAL LANGUAGE MODE" 4008 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4009 modifier = f"{modifier} WITH QUERY EXPANSION" 4010 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4011 modifier = "IN BOOLEAN MODE" 4012 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4013 modifier = "WITH QUERY EXPANSION" 4014 else: 4015 modifier = None 4016 4017 return self.expression( 4018 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4019 ) 4020 4021 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4022 def _parse_open_json(self) -> exp.OpenJSON: 4023 this = self._parse_bitwise() 4024 path = self._match(TokenType.COMMA) and self._parse_string() 4025 4026 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4027 this = self._parse_field(any_token=True) 4028 kind = self._parse_types() 4029 path = self._parse_string() 4030 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4031 4032 return self.expression( 4033 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4034 ) 4035 4036 expressions = None 4037 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4038 self._match_l_paren() 4039 expressions = self._parse_csv(_parse_open_json_column_def) 4040 4041 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4042 4043 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4044 args = self._parse_csv(self._parse_bitwise) 4045 4046 if self._match(TokenType.IN): 4047 return self.expression( 4048 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4049 ) 4050 4051 if haystack_first: 4052 haystack = seq_get(args, 0) 4053 needle = seq_get(args, 1) 4054 else: 4055 needle = seq_get(args, 0) 4056 haystack = seq_get(args, 1) 4057 4058 return self.expression( 4059 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4060 ) 4061 4062 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4063 args = self._parse_csv(self._parse_table) 4064 return exp.JoinHint(this=func_name.upper(), expressions=args) 4065 4066 def _parse_substring(self) -> exp.Substring: 4067 # Postgres supports the form: substring(string [from int] [for int]) 4068 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4069 4070 args = self._parse_csv(self._parse_bitwise) 4071 4072 if self._match(TokenType.FROM): 4073 args.append(self._parse_bitwise()) 4074 if self._match(TokenType.FOR): 4075 args.append(self._parse_bitwise()) 4076 4077 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4078 4079 def _parse_trim(self) -> exp.Trim: 4080 # https://www.w3resource.com/sql/character-functions/trim.php 4081 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4082 4083 position = None 4084 collation = None 4085 4086 if self._match_texts(self.TRIM_TYPES): 4087 position = self._prev.text.upper() 4088 4089 expression = self._parse_bitwise() 4090 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4091 this = self._parse_bitwise() 4092 else: 4093 this = expression 4094 expression = None 4095 4096 if self._match(TokenType.COLLATE): 4097 collation = self._parse_bitwise() 4098 4099 return self.expression( 4100 exp.Trim, this=this, position=position, expression=expression, collation=collation 4101 ) 4102 4103 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4104 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4105 4106 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4107 return self._parse_window(self._parse_id_var(), alias=True) 4108 4109 def _parse_respect_or_ignore_nulls( 4110 self, this: t.Optional[exp.Expression] 4111 ) -> t.Optional[exp.Expression]: 4112 if self._match_text_seq("IGNORE", "NULLS"): 4113 return self.expression(exp.IgnoreNulls, this=this) 4114 if self._match_text_seq("RESPECT", "NULLS"): 4115 return self.expression(exp.RespectNulls, this=this) 4116 return this 4117 4118 def _parse_window( 4119 self, this: t.Optional[exp.Expression], alias: bool = False 4120 ) -> t.Optional[exp.Expression]: 4121 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4122 self._match(TokenType.WHERE) 4123 this = self.expression( 4124 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4125 ) 4126 self._match_r_paren() 4127 4128 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4129 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4130 if self._match_text_seq("WITHIN", "GROUP"): 4131 order = self._parse_wrapped(self._parse_order) 4132 this = self.expression(exp.WithinGroup, this=this, expression=order) 4133 4134 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4135 # Some dialects choose to implement and some do not. 4136 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4137 4138 # There is some code above in _parse_lambda that handles 4139 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4140 4141 # The below changes handle 4142 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4143 4144 # Oracle allows both formats 4145 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4146 # and Snowflake chose to do the same for familiarity 4147 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4148 this = self._parse_respect_or_ignore_nulls(this) 4149 4150 # bigquery select from window x AS (partition by ...) 4151 if alias: 4152 over = None 4153 self._match(TokenType.ALIAS) 4154 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4155 return this 4156 else: 4157 over = self._prev.text.upper() 4158 4159 if not self._match(TokenType.L_PAREN): 4160 return self.expression( 4161 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4162 ) 4163 4164 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4165 4166 first = self._match(TokenType.FIRST) 4167 if self._match_text_seq("LAST"): 4168 first = False 4169 4170 partition = self._parse_partition_by() 4171 order = self._parse_order() 4172 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4173 4174 if kind: 4175 self._match(TokenType.BETWEEN) 4176 start = self._parse_window_spec() 4177 self._match(TokenType.AND) 4178 end = self._parse_window_spec() 4179 4180 spec = self.expression( 4181 exp.WindowSpec, 4182 kind=kind, 4183 start=start["value"], 4184 start_side=start["side"], 4185 end=end["value"], 4186 end_side=end["side"], 4187 ) 4188 else: 4189 spec = None 4190 4191 self._match_r_paren() 4192 4193 window = self.expression( 4194 exp.Window, 4195 this=this, 4196 partition_by=partition, 4197 order=order, 4198 spec=spec, 4199 alias=window_alias, 4200 over=over, 4201 first=first, 4202 ) 4203 4204 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4205 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4206 return self._parse_window(window, alias=alias) 4207 4208 return window 4209 4210 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4211 self._match(TokenType.BETWEEN) 4212 4213 return { 4214 "value": ( 4215 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4216 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4217 or self._parse_bitwise() 4218 ), 4219 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4220 } 4221 4222 def _parse_alias( 4223 self, this: t.Optional[exp.Expression], explicit: bool = False 4224 ) -> t.Optional[exp.Expression]: 4225 any_token = self._match(TokenType.ALIAS) 4226 4227 if explicit and not any_token: 4228 return this 4229 4230 if self._match(TokenType.L_PAREN): 4231 aliases = self.expression( 4232 exp.Aliases, 4233 this=this, 4234 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4235 ) 4236 self._match_r_paren(aliases) 4237 return aliases 4238 4239 alias = self._parse_id_var(any_token) 4240 4241 if alias: 4242 return self.expression(exp.Alias, this=this, alias=alias) 4243 4244 return this 4245 4246 def _parse_id_var( 4247 self, 4248 any_token: bool = True, 4249 tokens: t.Optional[t.Collection[TokenType]] = None, 4250 ) -> t.Optional[exp.Expression]: 4251 identifier = self._parse_identifier() 4252 4253 if identifier: 4254 return identifier 4255 4256 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4257 quoted = self._prev.token_type == TokenType.STRING 4258 return exp.Identifier(this=self._prev.text, quoted=quoted) 4259 4260 return None 4261 4262 def _parse_string(self) -> t.Optional[exp.Expression]: 4263 if self._match(TokenType.STRING): 4264 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4265 return self._parse_placeholder() 4266 4267 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4268 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4269 4270 def _parse_number(self) -> t.Optional[exp.Expression]: 4271 if self._match(TokenType.NUMBER): 4272 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4273 return self._parse_placeholder() 4274 4275 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4276 if self._match(TokenType.IDENTIFIER): 4277 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4278 return self._parse_placeholder() 4279 4280 def _parse_var( 4281 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4282 ) -> t.Optional[exp.Expression]: 4283 if ( 4284 (any_token and self._advance_any()) 4285 or self._match(TokenType.VAR) 4286 or (self._match_set(tokens) if tokens else False) 4287 ): 4288 return self.expression(exp.Var, this=self._prev.text) 4289 return self._parse_placeholder() 4290 4291 def _advance_any(self) -> t.Optional[Token]: 4292 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4293 self._advance() 4294 return self._prev 4295 return None 4296 4297 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4298 return self._parse_var() or self._parse_string() 4299 4300 def _parse_null(self) -> t.Optional[exp.Expression]: 4301 if self._match(TokenType.NULL): 4302 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4303 return None 4304 4305 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4306 if self._match(TokenType.TRUE): 4307 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4308 if self._match(TokenType.FALSE): 4309 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4310 return None 4311 4312 def _parse_star(self) -> t.Optional[exp.Expression]: 4313 if self._match(TokenType.STAR): 4314 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4315 return None 4316 4317 def _parse_parameter(self) -> exp.Parameter: 4318 wrapped = self._match(TokenType.L_BRACE) 4319 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4320 self._match(TokenType.R_BRACE) 4321 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4322 4323 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4324 if self._match_set(self.PLACEHOLDER_PARSERS): 4325 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4326 if placeholder: 4327 return placeholder 4328 self._advance(-1) 4329 return None 4330 4331 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4332 if not self._match(TokenType.EXCEPT): 4333 return None 4334 if self._match(TokenType.L_PAREN, advance=False): 4335 return self._parse_wrapped_csv(self._parse_column) 4336 return self._parse_csv(self._parse_column) 4337 4338 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4339 if not self._match(TokenType.REPLACE): 4340 return None 4341 if self._match(TokenType.L_PAREN, advance=False): 4342 return self._parse_wrapped_csv(self._parse_expression) 4343 return self._parse_expressions() 4344 4345 def _parse_csv( 4346 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4347 ) -> t.List[t.Optional[exp.Expression]]: 4348 parse_result = parse_method() 4349 items = [parse_result] if parse_result is not None else [] 4350 4351 while self._match(sep): 4352 self._add_comments(parse_result) 4353 parse_result = parse_method() 4354 if parse_result is not None: 4355 items.append(parse_result) 4356 4357 return items 4358 4359 def _parse_tokens( 4360 self, parse_method: t.Callable, expressions: t.Dict 4361 ) -> t.Optional[exp.Expression]: 4362 this = parse_method() 4363 4364 while self._match_set(expressions): 4365 this = self.expression( 4366 expressions[self._prev.token_type], 4367 this=this, 4368 comments=self._prev_comments, 4369 expression=parse_method(), 4370 ) 4371 4372 return this 4373 4374 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4375 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4376 4377 def _parse_wrapped_csv( 4378 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4379 ) -> t.List[t.Optional[exp.Expression]]: 4380 return self._parse_wrapped( 4381 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4382 ) 4383 4384 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4385 wrapped = self._match(TokenType.L_PAREN) 4386 if not wrapped and not optional: 4387 self.raise_error("Expecting (") 4388 parse_result = parse_method() 4389 if wrapped: 4390 self._match_r_paren() 4391 return parse_result 4392 4393 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4394 return self._parse_csv(self._parse_expression) 4395 4396 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4397 return self._parse_select() or self._parse_set_operations( 4398 self._parse_expression() if alias else self._parse_conjunction() 4399 ) 4400 4401 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4402 return self._parse_query_modifiers( 4403 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4404 ) 4405 4406 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4407 this = None 4408 if self._match_texts(self.TRANSACTION_KIND): 4409 this = self._prev.text 4410 4411 self._match_texts({"TRANSACTION", "WORK"}) 4412 4413 modes = [] 4414 while True: 4415 mode = [] 4416 while self._match(TokenType.VAR): 4417 mode.append(self._prev.text) 4418 4419 if mode: 4420 modes.append(" ".join(mode)) 4421 if not self._match(TokenType.COMMA): 4422 break 4423 4424 return self.expression(exp.Transaction, this=this, modes=modes) 4425 4426 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4427 chain = None 4428 savepoint = None 4429 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4430 4431 self._match_texts({"TRANSACTION", "WORK"}) 4432 4433 if self._match_text_seq("TO"): 4434 self._match_text_seq("SAVEPOINT") 4435 savepoint = self._parse_id_var() 4436 4437 if self._match(TokenType.AND): 4438 chain = not self._match_text_seq("NO") 4439 self._match_text_seq("CHAIN") 4440 4441 if is_rollback: 4442 return self.expression(exp.Rollback, savepoint=savepoint) 4443 4444 return self.expression(exp.Commit, chain=chain) 4445 4446 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4447 if not self._match_text_seq("ADD"): 4448 return None 4449 4450 self._match(TokenType.COLUMN) 4451 exists_column = self._parse_exists(not_=True) 4452 expression = self._parse_column_def(self._parse_field(any_token=True)) 4453 4454 if expression: 4455 expression.set("exists", exists_column) 4456 4457 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4458 if self._match_texts(("FIRST", "AFTER")): 4459 position = self._prev.text 4460 column_position = self.expression( 4461 exp.ColumnPosition, this=self._parse_column(), position=position 4462 ) 4463 expression.set("position", column_position) 4464 4465 return expression 4466 4467 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4468 drop = self._match(TokenType.DROP) and self._parse_drop() 4469 if drop and not isinstance(drop, exp.Command): 4470 drop.set("kind", drop.args.get("kind", "COLUMN")) 4471 return drop 4472 4473 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4474 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4475 return self.expression( 4476 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4477 ) 4478 4479 def _parse_add_constraint(self) -> exp.AddConstraint: 4480 this = None 4481 kind = self._prev.token_type 4482 4483 if kind == TokenType.CONSTRAINT: 4484 this = self._parse_id_var() 4485 4486 if self._match_text_seq("CHECK"): 4487 expression = self._parse_wrapped(self._parse_conjunction) 4488 enforced = self._match_text_seq("ENFORCED") 4489 4490 return self.expression( 4491 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4492 ) 4493 4494 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4495 expression = self._parse_foreign_key() 4496 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4497 expression = self._parse_primary_key() 4498 else: 4499 expression = None 4500 4501 return self.expression(exp.AddConstraint, this=this, expression=expression) 4502 4503 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4504 index = self._index - 1 4505 4506 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4507 return self._parse_csv(self._parse_add_constraint) 4508 4509 self._retreat(index) 4510 return self._parse_csv(self._parse_add_column) 4511 4512 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4513 self._match(TokenType.COLUMN) 4514 column = self._parse_field(any_token=True) 4515 4516 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4517 return self.expression(exp.AlterColumn, this=column, drop=True) 4518 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4519 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4520 4521 self._match_text_seq("SET", "DATA") 4522 return self.expression( 4523 exp.AlterColumn, 4524 this=column, 4525 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4526 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4527 using=self._match(TokenType.USING) and self._parse_conjunction(), 4528 ) 4529 4530 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4531 index = self._index - 1 4532 4533 partition_exists = self._parse_exists() 4534 if self._match(TokenType.PARTITION, advance=False): 4535 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4536 4537 self._retreat(index) 4538 return self._parse_csv(self._parse_drop_column) 4539 4540 def _parse_alter_table_rename(self) -> exp.RenameTable: 4541 self._match_text_seq("TO") 4542 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4543 4544 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4545 start = self._prev 4546 4547 if not self._match(TokenType.TABLE): 4548 return self._parse_as_command(start) 4549 4550 exists = self._parse_exists() 4551 this = self._parse_table(schema=True) 4552 4553 if self._next: 4554 self._advance() 4555 4556 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4557 if parser: 4558 actions = ensure_list(parser(self)) 4559 4560 if not self._curr: 4561 return self.expression( 4562 exp.AlterTable, 4563 this=this, 4564 exists=exists, 4565 actions=actions, 4566 ) 4567 return self._parse_as_command(start) 4568 4569 def _parse_merge(self) -> exp.Merge: 4570 self._match(TokenType.INTO) 4571 target = self._parse_table() 4572 4573 self._match(TokenType.USING) 4574 using = self._parse_table() 4575 4576 self._match(TokenType.ON) 4577 on = self._parse_conjunction() 4578 4579 whens = [] 4580 while self._match(TokenType.WHEN): 4581 matched = not self._match(TokenType.NOT) 4582 self._match_text_seq("MATCHED") 4583 source = ( 4584 False 4585 if self._match_text_seq("BY", "TARGET") 4586 else self._match_text_seq("BY", "SOURCE") 4587 ) 4588 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4589 4590 self._match(TokenType.THEN) 4591 4592 if self._match(TokenType.INSERT): 4593 _this = self._parse_star() 4594 if _this: 4595 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4596 else: 4597 then = self.expression( 4598 exp.Insert, 4599 this=self._parse_value(), 4600 expression=self._match(TokenType.VALUES) and self._parse_value(), 4601 ) 4602 elif self._match(TokenType.UPDATE): 4603 expressions = self._parse_star() 4604 if expressions: 4605 then = self.expression(exp.Update, expressions=expressions) 4606 else: 4607 then = self.expression( 4608 exp.Update, 4609 expressions=self._match(TokenType.SET) 4610 and self._parse_csv(self._parse_equality), 4611 ) 4612 elif self._match(TokenType.DELETE): 4613 then = self.expression(exp.Var, this=self._prev.text) 4614 else: 4615 then = None 4616 4617 whens.append( 4618 self.expression( 4619 exp.When, 4620 matched=matched, 4621 source=source, 4622 condition=condition, 4623 then=then, 4624 ) 4625 ) 4626 4627 return self.expression( 4628 exp.Merge, 4629 this=target, 4630 using=using, 4631 on=on, 4632 expressions=whens, 4633 ) 4634 4635 def _parse_show(self) -> t.Optional[exp.Expression]: 4636 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4637 if parser: 4638 return parser(self) 4639 self._advance() 4640 return self.expression(exp.Show, this=self._prev.text.upper()) 4641 4642 def _parse_set_item_assignment( 4643 self, kind: t.Optional[str] = None 4644 ) -> t.Optional[exp.Expression]: 4645 index = self._index 4646 4647 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4648 return self._parse_set_transaction(global_=kind == "GLOBAL") 4649 4650 left = self._parse_primary() or self._parse_id_var() 4651 4652 if not self._match_texts(("=", "TO")): 4653 self._retreat(index) 4654 return None 4655 4656 right = self._parse_statement() or self._parse_id_var() 4657 this = self.expression(exp.EQ, this=left, expression=right) 4658 4659 return self.expression(exp.SetItem, this=this, kind=kind) 4660 4661 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4662 self._match_text_seq("TRANSACTION") 4663 characteristics = self._parse_csv( 4664 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4665 ) 4666 return self.expression( 4667 exp.SetItem, 4668 expressions=characteristics, 4669 kind="TRANSACTION", 4670 **{"global": global_}, # type: ignore 4671 ) 4672 4673 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4674 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4675 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4676 4677 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4678 index = self._index 4679 set_ = self.expression( 4680 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4681 ) 4682 4683 if self._curr: 4684 self._retreat(index) 4685 return self._parse_as_command(self._prev) 4686 4687 return set_ 4688 4689 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4690 for option in options: 4691 if self._match_text_seq(*option.split(" ")): 4692 return exp.var(option) 4693 return None 4694 4695 def _parse_as_command(self, start: Token) -> exp.Command: 4696 while self._curr: 4697 self._advance() 4698 text = self._find_sql(start, self._prev) 4699 size = len(start.text) 4700 return exp.Command(this=text[:size], expression=text[size:]) 4701 4702 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4703 settings = [] 4704 4705 self._match_l_paren() 4706 kind = self._parse_id_var() 4707 4708 if self._match(TokenType.L_PAREN): 4709 while True: 4710 key = self._parse_id_var() 4711 value = self._parse_primary() 4712 4713 if not key and value is None: 4714 break 4715 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4716 self._match(TokenType.R_PAREN) 4717 4718 self._match_r_paren() 4719 4720 return self.expression( 4721 exp.DictProperty, 4722 this=this, 4723 kind=kind.this if kind else None, 4724 settings=settings, 4725 ) 4726 4727 def _parse_dict_range(self, this: str) -> exp.DictRange: 4728 self._match_l_paren() 4729 has_min = self._match_text_seq("MIN") 4730 if has_min: 4731 min = self._parse_var() or self._parse_primary() 4732 self._match_text_seq("MAX") 4733 max = self._parse_var() or self._parse_primary() 4734 else: 4735 max = self._parse_var() or self._parse_primary() 4736 min = exp.Literal.number(0) 4737 self._match_r_paren() 4738 return self.expression(exp.DictRange, this=this, min=min, max=max) 4739 4740 def _find_parser( 4741 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4742 ) -> t.Optional[t.Callable]: 4743 if not self._curr: 4744 return None 4745 4746 index = self._index 4747 this = [] 4748 while True: 4749 # The current token might be multiple words 4750 curr = self._curr.text.upper() 4751 key = curr.split(" ") 4752 this.append(curr) 4753 4754 self._advance() 4755 result, trie = in_trie(trie, key) 4756 if result == TrieResult.FAILED: 4757 break 4758 4759 if result == TrieResult.EXISTS: 4760 subparser = parsers[" ".join(this)] 4761 return subparser 4762 4763 self._retreat(index) 4764 return None 4765 4766 def _match(self, token_type, advance=True, expression=None): 4767 if not self._curr: 4768 return None 4769 4770 if self._curr.token_type == token_type: 4771 if advance: 4772 self._advance() 4773 self._add_comments(expression) 4774 return True 4775 4776 return None 4777 4778 def _match_set(self, types, advance=True): 4779 if not self._curr: 4780 return None 4781 4782 if self._curr.token_type in types: 4783 if advance: 4784 self._advance() 4785 return True 4786 4787 return None 4788 4789 def _match_pair(self, token_type_a, token_type_b, advance=True): 4790 if not self._curr or not self._next: 4791 return None 4792 4793 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4794 if advance: 4795 self._advance(2) 4796 return True 4797 4798 return None 4799 4800 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4801 if not self._match(TokenType.L_PAREN, expression=expression): 4802 self.raise_error("Expecting (") 4803 4804 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4805 if not self._match(TokenType.R_PAREN, expression=expression): 4806 self.raise_error("Expecting )") 4807 4808 def _match_texts(self, texts, advance=True): 4809 if self._curr and self._curr.text.upper() in texts: 4810 if advance: 4811 self._advance() 4812 return True 4813 return False 4814 4815 def _match_text_seq(self, *texts, advance=True): 4816 index = self._index 4817 for text in texts: 4818 if self._curr and self._curr.text.upper() == text: 4819 self._advance() 4820 else: 4821 self._retreat(index) 4822 return False 4823 4824 if not advance: 4825 self._retreat(index) 4826 4827 return True 4828 4829 @t.overload 4830 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4831 ... 4832 4833 @t.overload 4834 def _replace_columns_with_dots( 4835 self, this: t.Optional[exp.Expression] 4836 ) -> t.Optional[exp.Expression]: 4837 ... 4838 4839 def _replace_columns_with_dots(self, this): 4840 if isinstance(this, exp.Dot): 4841 exp.replace_children(this, self._replace_columns_with_dots) 4842 elif isinstance(this, exp.Column): 4843 exp.replace_children(this, self._replace_columns_with_dots) 4844 table = this.args.get("table") 4845 this = ( 4846 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4847 ) 4848 4849 return this 4850 4851 def _replace_lambda( 4852 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4853 ) -> t.Optional[exp.Expression]: 4854 if not node: 4855 return node 4856 4857 for column in node.find_all(exp.Column): 4858 if column.parts[0].name in lambda_variables: 4859 dot_or_id = column.to_dot() if column.table else column.this 4860 parent = column.parent 4861 4862 while isinstance(parent, exp.Dot): 4863 if not isinstance(parent.parent, exp.Dot): 4864 parent.replace(dot_or_id) 4865 break 4866 parent = parent.parent 4867 else: 4868 if column is node: 4869 node = dot_or_id 4870 else: 4871 column.replace(dot_or_id) 4872 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
847 def __init__( 848 self, 849 error_level: t.Optional[ErrorLevel] = None, 850 error_message_context: int = 100, 851 max_errors: int = 3, 852 ): 853 self.error_level = error_level or ErrorLevel.IMMEDIATE 854 self.error_message_context = error_message_context 855 self.max_errors = max_errors 856 self.reset()
868 def parse( 869 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 870 ) -> t.List[t.Optional[exp.Expression]]: 871 """ 872 Parses a list of tokens and returns a list of syntax trees, one tree 873 per parsed SQL statement. 874 875 Args: 876 raw_tokens: The list of tokens. 877 sql: The original SQL string, used to produce helpful debug messages. 878 879 Returns: 880 The list of the produced syntax trees. 881 """ 882 return self._parse( 883 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 884 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
886 def parse_into( 887 self, 888 expression_types: exp.IntoType, 889 raw_tokens: t.List[Token], 890 sql: t.Optional[str] = None, 891 ) -> t.List[t.Optional[exp.Expression]]: 892 """ 893 Parses a list of tokens into a given Expression type. If a collection of Expression 894 types is given instead, this method will try to parse the token list into each one 895 of them, stopping at the first for which the parsing succeeds. 896 897 Args: 898 expression_types: The expression type(s) to try and parse the token list into. 899 raw_tokens: The list of tokens. 900 sql: The original SQL string, used to produce helpful debug messages. 901 902 Returns: 903 The target Expression. 904 """ 905 errors = [] 906 for expression_type in ensure_list(expression_types): 907 parser = self.EXPRESSION_PARSERS.get(expression_type) 908 if not parser: 909 raise TypeError(f"No parser registered for {expression_type}") 910 911 try: 912 return self._parse(parser, raw_tokens, sql) 913 except ParseError as e: 914 e.errors[0]["into_expression"] = expression_type 915 errors.append(e) 916 917 raise ParseError( 918 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 919 errors=merge_errors(errors), 920 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
957 def check_errors(self) -> None: 958 """Logs or raises any found errors, depending on the chosen error level setting.""" 959 if self.error_level == ErrorLevel.WARN: 960 for error in self.errors: 961 logger.error(str(error)) 962 elif self.error_level == ErrorLevel.RAISE and self.errors: 963 raise ParseError( 964 concat_messages(self.errors, self.max_errors), 965 errors=merge_errors(self.errors), 966 )
Logs or raises any found errors, depending on the chosen error level setting.
968 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 969 """ 970 Appends an error in the list of recorded errors or raises it, depending on the chosen 971 error level setting. 972 """ 973 token = token or self._curr or self._prev or Token.string("") 974 start = token.start 975 end = token.end + 1 976 start_context = self.sql[max(start - self.error_message_context, 0) : start] 977 highlight = self.sql[start:end] 978 end_context = self.sql[end : end + self.error_message_context] 979 980 error = ParseError.new( 981 f"{message}. Line {token.line}, Col: {token.col}.\n" 982 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 983 description=message, 984 line=token.line, 985 col=token.col, 986 start_context=start_context, 987 highlight=highlight, 988 end_context=end_context, 989 ) 990 991 if self.error_level == ErrorLevel.IMMEDIATE: 992 raise error 993 994 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
996 def expression( 997 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 998 ) -> E: 999 """ 1000 Creates a new, validated Expression. 1001 1002 Args: 1003 exp_class: The expression class to instantiate. 1004 comments: An optional list of comments to attach to the expression. 1005 kwargs: The arguments to set for the expression along with their respective values. 1006 1007 Returns: 1008 The target expression. 1009 """ 1010 instance = exp_class(**kwargs) 1011 instance.add_comments(comments) if comments else self._add_comments(instance) 1012 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1019 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1020 """ 1021 Validates an Expression, making sure that all its mandatory arguments are set. 1022 1023 Args: 1024 expression: The expression to validate. 1025 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1026 1027 Returns: 1028 The validated expression. 1029 """ 1030 if self.error_level != ErrorLevel.IGNORE: 1031 for error_message in expression.error_messages(args): 1032 self.raise_error(error_message) 1033 1034 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.